# 02_audio_with_soundfont — Render de batería realista desde Dataset 1
Convierte patrones rítmicos (bitmasks) a **MIDI** y luego a **audio WAV** usando **PrettyMIDI + FluidSynth** y un **SoundFont GM**.  
Ejecuta las celdas en orden (o `Run All`).

In [None]:
# --- Windows: Habilitar DLL de FluidSynth para pretty_midi ---
import os, sys

FS_BIN = r"C:\tools\fluidsynth\bin"  # Ruta de instalación por Chocolatey (ajusta si es distinta)
if sys.platform == "win32":
    if hasattr(os, "add_dll_directory"):
        os.add_dll_directory(FS_BIN)
    os.environ["PATH"] = FS_BIN + ";" + os.environ.get("PATH", "")


In [8]:
import pretty_midi as pm
import soundfile as sf
from midi2audio import FluidSynth

In [9]:
import pretty_midi as pm
import numpy as np

def check_fluidsynth(sf2_path: str):
    try:
        m = pm.PrettyMIDI(initial_tempo=100)
        inst = pm.Instrument(program=0, is_drum=True)
        inst.notes.append(pm.Note(velocity=120, pitch=36, start=0.0, end=0.2))  # Kick
        inst.notes.append(pm.Note(velocity=120, pitch=38, start=0.4, end=0.6))  # Snare
        inst.notes.append(pm.Note(velocity=120, pitch=42, start=0.8, end=1.0))  # Hi-hat
        m.instruments.append(inst)
        audio = m.fluidsynth(sf2_path=sf2_path, fs=44100)
        print("FluidSynth funciona correctamente. Generado audio de prueba.")
        return True
    except Exception as e:
        print("[ERROR] FluidSynth falló:", e)
        return False

# Cambia esta ruta a donde tengas guardado tu SoundFont
SF2_PATH = r"C:\SoundFonts\FluidR3_GM.sf2"
check_fluidsynth(SF2_PATH)


FluidSynth funciona correctamente. Generado audio de prueba.


True

In [None]:
import os, pathlib as P, numpy as np, json, time

def find_project_root(start: P.Path) -> P.Path:
    start = P.Path(start).resolve()
    cur = start
    for _ in range(10):
        if (cur / "models" / "final").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    parts = start.parts
    if len(parts) >= 2 and parts[-2:] == ("src","notebooks"):
        try:
            return start.parents[2]
        except IndexError:
            pass
    return start

CWD = P.Path.cwd()
PROJECT_ROOT = find_project_root(CWD)
print("PROJECT_ROOT:", PROJECT_ROOT)

# Dataset del primer modelo (dataset_1)
DATASET_DIR = PROJECT_ROOT / "data" / "processed"
print("DATASET_DIR:", DATASET_DIR)

# EDITA ESTA RUTA a tu SoundFont .sf2:
SF2_PATH = P.Path(r"C:\SoundFonts\FluidR3_GM.sf2")
assert SF2_PATH.exists(), f"No encuentro el SoundFont: {SF2_PATH}"
print("SF2_PATH:", SF2_PATH)


PROJECT_ROOT: C:\Users\Djcho\Documents\Repositories\Polyrhythmia\model_lstm
DATASET_DIR: C:\Users\Djcho\Documents\Repositories\Polyrhythmia\model_lstm\data\processed
SF2_PATH: C:\SoundFonts\FluidR3_GM.sf2


In [11]:
import numpy as np

def _to_list(x):
    # Convierte cada "step" del .npz (dtype=object) a lista de activaciones
    try:
        arr = np.array(x)
        if arr.ndim >= 1 and np.all(np.isin(arr, [0, 1])) and arr.size > 1:
            flat = arr.ravel()
            return [int(i) for i, v in enumerate(flat) if int(v) == 1]
    except Exception:
        pass
    if isinstance(x, (list, tuple, np.ndarray)):
        vals = []
        for v in np.array(x).ravel():
            try:
                iv = int(v)
                if iv != 0: vals.append(iv)
            except Exception:
                continue
        return vals
    try:
        v = int(x); return [v] if v!=0 else []
    except Exception:
        return []

def scan_instrument_vocab_from_bars(bars_obj):
    smp = bars_obj[0,0]
    try:
        arr = np.array(smp)
        if arr.ndim>=1 and np.all(np.isin(arr,[0,1])) and arr.size>1:
            return list(range(int(arr.size)))   # canales 0..I-1
    except Exception:
        pass
    # si fueran pitches GM:
    inst=set(); N,T=bars_obj.shape
    for i in range(N):
        for t in range(T):
            for v in _to_list(bars_obj[i,t]): inst.add(int(v))
    return sorted(inst)

def bars_to_3d_with_vocab(bars_obj, vocab):
    idx={p:i for i,p in enumerate(vocab)}
    N,T = bars_obj.shape; I=len(vocab)
    BM=np.zeros((N,T,I), dtype=np.int8)
    is_index_vocab = all(isinstance(p,(int,np.integer)) and p<32 for p in vocab)
    for i in range(N):
        for t in range(T):
            lst=_to_list(bars_obj[i,t])
            if is_index_vocab:
                for j in lst:
                    if 0<=j<I: BM[i,t,j]=1
            else:
                for pitch in lst:
                    jj=idx.get(int(pitch), None)
                    if jj is not None: BM[i,t,jj]=1
    return BM


In [12]:
bm_path = DATASET_DIR / "train_bitmasks.npz"
assert bm_path.exists(), f"No existe {bm_path}"
npz = np.load(bm_path, allow_pickle=True)
bars = npz["bars"]  # (N,T) dtype=object

vocab_inst = scan_instrument_vocab_from_bars(bars)
print("Instrumentos detectados:", vocab_inst)

BM = bars_to_3d_with_vocab(bars, vocab_inst)
N, T, I = BM.shape
print(f"BM shape: {BM.shape}  (N={N}, T={T}, I={I})")


Instrumentos detectados: [1, 10, 11, 100, 101, 110, 1000, 1001, 1010, 1100, 10000, 10001, 10010, 10100, 11000, 100000, 100001, 100010, 100100, 100101, 100110, 101000, 101100, 110000, 1000000, 1000001, 1000010, 1000011, 1000100, 1000101, 1000110, 1001000, 1001001, 1001100, 1010000, 1010001, 1010100, 1011000, 1011100, 1100000, 1100001, 1100010, 1100011, 1100100, 1100101, 1100110, 1101000, 1101100, 1110000, 10000000, 10000001, 10000010, 10000011, 10000100, 10000101, 10000111, 10001000, 10001001, 10010000, 10010001, 10010100, 10100000, 10100001, 10100100, 10101000, 11000000, 11000001, 11000010, 11000011, 11000100, 11000101, 11000110, 11001000, 11001001, 11010000, 11010100, 11100000, 11100001, 11100010, 11100100, 100000000, 100000001, 100000010, 100000011, 100000100, 100000101, 100000110, 100001000, 100001001, 100001010, 100001011, 100001100, 100001101, 100001110, 100010000, 100010001, 100010010, 100010011, 100010100, 100010101, 100010110, 100011000, 100011001, 100011010, 100100000, 1001000

In [13]:
GM_KICKS  = {35,36}; GM_SNARES = {38,40}
GM_HHC    = {42};    GM_HHO    = {46}
GM_TOMS   = {41,43,45,47,48,50}
GM_RIDE   = {51,59}; GM_CRASH  = {49,57}

def infer_roles_from_BM(BM):
    N,T,I = BM.shape
    density = BM.mean(axis=(0,1))
    beats = [0,4,8,12]; backbeats = [4,12]
    onbeat = BM[:,beats,:].sum(axis=(0,1))
    back   = BM[:,backbeats,:].sum(axis=(0,1))

    hh_idx = int(np.argmax(density))
    mask = np.ones(I,bool); mask[hh_idx]=False
    sn_idx = int(np.argmax(np.where(mask, back, -1)))
    mask[sn_idx]=False
    kd_idx = int(np.argmax(np.where(mask, onbeat, -1)))
    return kd_idx, sn_idx, hh_idx

kd_idx, sn_idx, hh_idx = infer_roles_from_BM(BM)
print(f"Roles inferidos → KD={kd_idx}, SN={sn_idx}, HH={hh_idx}")

# Resto: rotación simple por toms/platillos/percs
rota=[41,43,45,47,48,51,49,37,46,57,59,54]
gm_per_index=[0]*I
if 0<=kd_idx<I: gm_per_index[kd_idx]=36
if 0<=sn_idx<I: gm_per_index[sn_idx]=38
if 0<=hh_idx<I: gm_per_index[hh_idx]=42
rest=[j for j in range(I) if j not in (kd_idx,sn_idx,hh_idx)]
for k,j in enumerate(rest): gm_per_index[j]=rota[k%len(rota)]
print("GM map por índice:", gm_per_index)


Roles inferidos → KD=80, SN=65, HH=49
GM map por índice: [41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 42, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 38, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 36, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41, 43, 45, 47, 48, 51, 49, 37, 46, 57, 59, 54, 41]


In [14]:
# --- Windows: habilitar la carpeta de DLLs de FluidSynth para el loader de Python ---
import os, sys, pathlib as P

FS_BIN = r"C:\tools\fluidsynth\bin"  # ajusta si instalaste en otra ruta
if sys.platform == "win32":
    # 1) Para el loader de DLLs de Python 3.8+ (recomendado)
    if hasattr(os, "add_dll_directory"):
        os.add_dll_directory(FS_BIN)
    # 2) Además, prepend en PATH por si otros paquetes lo requieren
    os.environ["PATH"] = FS_BIN + ";" + os.environ.get("PATH", "")

In [None]:
import pretty_midi as pm, soundfile as sf
from IPython.display import Audio, display

def multihot_to_pretty_midi(M, bpm=100, gm_map=None):
    T,I = M.shape
    spb = 60.0/bpm
    bar = spb*4.0
    steps = T if (T in (16,32,64)) else 16
    step = bar/steps

    midi = pm.PrettyMIDI(initial_tempo=bpm)
    drum = pm.Instrument(program=0, is_drum=True)

    for t in range(T):
        hits = np.where(M[t]==1)[0]
        for j in hits:
            pitch = gm_map[j] if (gm_map and j<len(gm_map)) else 36
            start = float(t*step); end = float(start+0.06)
            drum.notes.append(pm.Note(velocity=100, pitch=int(pitch), start=start, end=end))

    midi.instruments.append(drum)
    return midi

import pretty_midi as pm, soundfile as sf
from pathlib import Path

def write_wav_from_pretty_midi(pm_obj, sf2_path, out_wav, sr=44100):
    out_wav = Path(out_wav)
    sf2_path = Path(sf2_path)
    try:
        # Opción A: render en memoria (rápido) con pyfluidsynth
        audio = pm_obj.fluidsynth(sf2_path=str(sf2_path), fs=sr)
        sf.write(str(out_wav), audio, sr)
        return out_wav
    except Exception as e:
        print("[WARN] pretty_midi.fluidsynth falló, uso fallback midi2audio. Detalle:", e)
        # Opción B (fallback): exportar MIDI y convertir con el binario fluidsynth via midi2audio
        mid_tmp = out_wav.with_suffix(".mid")
        pm_obj.write(str(mid_tmp))
        try:
            from midi2audio import FluidSynth
            fs = FluidSynth(sound_font=str(sf2_path))  # usa el SF2 que tú pasas
            fs.midi_to_audio(str(mid_tmp), str(out_wav))
            return out_wav
        finally:
            # si quieres, limpia el .mid temporal
            # import os; 
            # os.remove(mid_tmp)  # comenta si prefieres conservarlo para depurar
            pass

OUT_DIR = PROJECT_ROOT / "outputs" / time.strftime("%Y-%m-%d") / "dataset1_sf2_demo"
OUT_DIR.mkdir(parents=True, exist_ok=True)
print("OUT_DIR:", OUT_DIR)

BPM = 100
N_SAMPLES = 6
rng = np.random.default_rng(1234)
idxs = rng.choice(BM.shape[0], size=N_SAMPLES, replace=False)

wavs = []
for k,i in enumerate(idxs, start=1):
    M = BM[i]  # (T,I)
    pm_obj = multihot_to_pretty_midi(M, bpm=BPM, gm_map=gm_per_index)
    wav_path = OUT_DIR / f"dataset1_bar{i:05d}_{BPM}bpm.wav"
    write_wav_from_pretty_midi(pm_obj, SF2_PATH, wav_path, sr=44100)
    print("Guardado:", wav_path.name)
    display(Audio(filename=str(wav_path)))
print("Listo.")


OUT_DIR: C:\Users\Djcho\Documents\Repositories\Polyrhythmia\model_lstm\outputs\2025-10-15\dataset1_sf2_demo
Guardado: dataset1_bar18346_100bpm.wav


Guardado: dataset1_bar03182_100bpm.wav


Guardado: dataset1_bar07060_100bpm.wav


Guardado: dataset1_bar17147_100bpm.wav


Guardado: dataset1_bar18136_100bpm.wav


Guardado: dataset1_bar18184_100bpm.wav


Listo.


: 