In [9]:
import os, sys
FS_BIN = r"C:\tools\fluidsynth\bin"  # ruta por defecto de chocolatey
if sys.platform == "win32":
    if hasattr(os, "add_dll_directory"):
        os.add_dll_directory(FS_BIN)
    os.environ["PATH"] = FS_BIN + ";" + os.environ.get("PATH", "")


In [10]:
import pathlib as P, json, numpy as np, time
from typing import Dict, Any, Optional, Tuple
from IPython.display import Audio, display

def find_project_root(start: P.Path) -> P.Path:
    start = P.Path(start).resolve()
    cur = start
    for _ in range(8):
        if (cur / "models" / "final").exists() and (cur / "data" / "processed").exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    parts = start.parts
    if len(parts) >= 2 and parts[-2:] == ("src","notebooks"):
        try:
            return start.parents[2]
        except IndexError:
            pass
    return start

CWD = P.Path.cwd()
PROJECT_ROOT = find_project_root(CWD)
print("PROJECT_ROOT:", PROJECT_ROOT)

# Primer modelo (dataset 1) – ajusta si tu archivo tiene otro nombre
MODELS_DIR = PROJECT_ROOT / "models" / "final"
MODEL_PATH = None
for cand in ["best.h5", "best_1.h5", "best_1.keras", "best.keras"]:
    p = MODELS_DIR / cand
    if p.exists():
        MODEL_PATH = p
        break
assert MODEL_PATH is not None, f"No encontré el modelo final del primer experimento en {MODELS_DIR}"
print("Usando modelo:", MODEL_PATH.name)

# Dataset 1 (processed)
DATASET_DIR = PROJECT_ROOT / "data" / "processed"
print("DATASET_DIR:", DATASET_DIR)

# SoundFont GM (EDITA si tu .sf2 está en otra ruta)
from pathlib import Path
SF2_PATH = Path(r"C:\SoundFonts\FluidR3_GM.sf2")
assert SF2_PATH.exists(), f"No encuentro SoundFont: {SF2_PATH}"
print("SF2_PATH:", SF2_PATH)

# Output
OUT_DIR = PROJECT_ROOT / "outputs" / time.strftime("%Y-%m-%d") / "model1_sf2_gen"
(OUT_DIR / "audio_wav").mkdir(parents=True, exist_ok=True)
print("OUT_DIR:", OUT_DIR)


PROJECT_ROOT: C:\Users\Djcho\Documents\Repositories\Polyrhythmia\model_lstm
Usando modelo: best.h5
DATASET_DIR: C:\Users\Djcho\Documents\Repositories\Polyrhythmia\model_lstm\data\processed
SF2_PATH: C:\SoundFonts\FluidR3_GM.sf2
OUT_DIR: C:\Users\Djcho\Documents\Repositories\Polyrhythmia\model_lstm\outputs\2025-10-15\model1_sf2_gen


In [11]:
import numpy as np

bm_path = DATASET_DIR / "train_bitmasks.npz"
with np.load(bm_path, allow_pickle=True) as z:
    bars = z["bars"]  # (N,T), dtype=object

def _to_list(x):
    try:
        arr = np.array(x)
        if arr.ndim >= 1 and np.all(np.isin(arr,[0,1])) and arr.size>1:
            return [int(i) for i,v in enumerate(arr.ravel()) if int(v)==1]
    except Exception:
        pass
    if isinstance(x,(list,tuple,np.ndarray)):
        vals=[]
        for v in np.array(x).ravel():
            try:
                iv=int(v)
                if iv!=0: vals.append(iv)
            except Exception:
                continue
        return vals
    try:
        v=int(x); return [v] if v!=0 else []
    except Exception:
        return []

def scan_vocab_from_bars(bars_obj):
    smp = bars_obj[0,0]
    try:
        arr = np.array(smp)
        if arr.ndim>=1 and np.all(np.isin(arr,[0,1])) and arr.size>1:
            return list(range(int(arr.size)))
    except Exception:
        pass
    inst=set(); N,T=bars_obj.shape
    for i in range(N):
        for t in range(T):
            for v in _to_list(bars_obj[i,t]):
                inst.add(int(v))
    return sorted(inst)

def bars_to_3d(bars_obj, vocab):
    idx={p:i for i,p in enumerate(vocab)}
    N,T=bars_obj.shape; I=len(vocab)
    M=np.zeros((N,T,I), dtype=np.int8)
    is_index = all(isinstance(p,(int,np.integer)) and p<32 for p in vocab)
    for i in range(N):
        for t in range(T):
            lst=_to_list(bars_obj[i,t])
            if is_index:
                for j in lst:
                    if 0<=j<I: M[i,t,j]=1
            else:
                for pitch in lst:
                    jj=idx.get(int(pitch), None)
                    if jj is not None: M[i,t,jj]=1
    return M

vocab_inst = scan_vocab_from_bars(bars)
BM = bars_to_3d(bars, vocab_inst)  # (N,T,I)
N,T,I = BM.shape
print("BM:", BM.shape, "vocab_inst:", vocab_inst[:10], "...")


BM: (18573, 16, 160) vocab_inst: [1, 10, 11, 100, 101, 110, 1000, 1001, 1010, 1100] ...


In [12]:
import tensorflow as tf

def safe_load_model(path: P.Path):
    # Compatible con .h5 y .keras
    custom_objects = {}
    return tf.keras.models.load_model(str(path), custom_objects=custom_objects, compile=False)

model = safe_load_model(MODEL_PATH)
print("Inputs:", [ (t.name, t.shape) for t in model.inputs ])
print("Outputs:", model.output_shape)

def infer_signature(model, T_hint:int=16):
    """
    Intenta mapear inputs por NOMBRE primero (pos, token, style). Si no es concluyente,
    usa fallback por SHAPE y orden: primer (None, T) -> tokens, segundo (None, T) -> pos,
    cualquier (None, S≠T) -> style.
    """
    sig = {"tokens": None, "pos": None, "style": None}
    names = [getattr(t, "name", "").lower() for t in model.inputs]
    # 1) por nombre
    for i, nm in enumerate(names):
        if any(k in nm for k in ["pos", "position"]):
            sig["pos"] = i
        elif any(k in nm for k in ["style", "genre", "cond"]):
            sig["style"] = i
    for i, nm in enumerate(names):
        if sig["tokens"] is None and any(k in nm for k in ["tok", "token", "x_tokens", "xtokens"]):
            sig["tokens"] = i

    # 2) fallback por shape
    if sig["tokens"] is None or (sig["pos"] is None and len(model.inputs) >= 2):
        # revisa shapes 2D con T_hint
        candidates = []
        for i, t in enumerate(model.inputs):
            shp = tuple(t.shape)
            if len(shp) == 2 and (shp[1] is None or (isinstance(shp[1], int) and shp[1] in (T_hint,))):
                candidates.append(i)
        # asigna tokens y pos si faltan
        if candidates:
            if sig["tokens"] is None:
                sig["tokens"] = candidates[0]
            if sig["pos"] is None and len(candidates) >= 2:
                # usa el siguiente distinto a tokens
                for c in candidates:
                    if c != sig["tokens"]:
                        sig["pos"] = c
                        break
        # style: (None,S!=T)
        for i, t in enumerate(model.inputs):
            if i in (sig["tokens"], sig["pos"]):
                continue
            shp = tuple(t.shape)
            if len(shp) == 2 and (shp[1] is not None) and (shp[1] != T_hint):
                sig["style"] = i

    # 3) último fallback: si el modelo tiene 2 inputs y solo detectamos uno, fuerza pos
    if len(model.inputs) == 2:
        # asegura dos índices distintos
        if sig["tokens"] is None:
            sig["tokens"] = 0
        if sig["pos"] is None:
            sig["pos"] = 1 if sig["tokens"] == 0 else 0

    return sig

sig = infer_signature(model, T_hint=T)
print("Firma inferida (robusta):", sig, "| n_inputs:", len(model.inputs))



Inputs: [('X_tokens', TensorShape([None, 16])), ('X_style', TensorShape([None, 6]))]
Outputs: (None, 16, 166)
Firma inferida (robusta): {'tokens': 0, 'pos': 1, 'style': 1} | n_inputs: 2


In [13]:
rng = np.random.default_rng(2025)

def top_k_sample(probs: np.ndarray, k:int=8, temperature:float=1.0, rest_ids=None) -> int:
    p = np.asarray(probs, dtype=np.float64)
    p = np.maximum(p, 1e-12)
    p = p ** (1.0/float(max(1e-4, temperature)))
    if rest_ids:
        p[list(rest_ids)] *= 0.75
    if k is not None and k < p.size:
        idx = np.argpartition(p, -k)[-k:]
        sub = p[idx]; sub = sub / (sub.sum() + 1e-12)
        return int(rng.choice(idx, p=sub))
    return int(rng.choice(np.arange(p.size), p=p/p.sum()))

def _build_feeds(model, sig, X, pos, style_vec):
    """Construye la lista de entradas en el ORDEN EXACTO que espera el modelo."""
    n = len(model.inputs)
    feeds = [None] * n

    # tokens
    if sig["tokens"] is not None:
        feeds[sig["tokens"]] = X
    else:
        # si no detectamos tokens, por seguridad asume 0
        feeds[0] = X

    # pos
    if sig["pos"] is not None:
        feeds[sig["pos"]] = pos
    else:
        # intenta hallar un hueco con shape compatible (None,T) para poner pos
        T_cur = X.shape[1]
        for i, t in enumerate(model.inputs):
            if feeds[i] is None:
                shp = tuple(t.shape)
                if len(shp) == 2 and (shp[1] is None or (isinstance(shp[1], int) and shp[1] in (T_cur,))):
                    feeds[i] = pos
                    sig["pos"] = i
                    break

    # style
    if sig["style"] is not None:
        S = int(model.inputs[sig["style"]].shape[-1])
        Z = np.zeros((1, S), dtype=np.float32) if style_vec is None else np.asarray(style_vec, dtype=np.float32)[None, :]
        feeds[sig["style"]] = Z

    # rellena cualquier None restante con tensores dummy seguros (evita fallo por #inputs)
    for i, t in enumerate(model.inputs):
        if feeds[i] is None:
            shp = tuple(t.shape)
            if len(shp) == 2:
                L = 1 if shp[1] is None else int(shp[1])
                L = L if L > 0 else X.shape[1]
                feeds[i] = np.zeros((1, L), dtype=np.int32)
            else:
                feeds[i] = np.zeros((1,), dtype=np.float32)
    return feeds

def generate_bar(model, T:int, top_k:int=8, temperature:float=1.0, style_vec=None, rest_ids=None):
    X = np.zeros((1, T), dtype=np.int32)
    pos = np.arange(1, T+1, dtype=np.int32)[None, :]
    feeds = _build_feeds(model, sig, X, pos, style_vec)

    for t in range(T):
        y = model(feeds, training=False).numpy()  # (1,T,V)
        V = y.shape[-1]
        tok = top_k_sample(y[0, t, :], k=top_k, temperature=temperature, rest_ids=rest_ids)
        # actualiza el paso t
        feeds[sig["tokens"]][0, t] = min(tok, V-1)

    return feeds[sig["tokens"]][0].copy()


In [14]:
from collections import defaultdict

def estimate_id2bitmask_from_BM(BM, vocab_cap:Optional[int]=None):
    I = BM.shape[-1]
    dominant = int(np.argmax(BM.sum(axis=(0,1))))
    default_mask = np.zeros(I, dtype=np.int32); default_mask[dominant]=1
    return defaultdict(lambda: default_mask.copy())

id2bit = estimate_id2bitmask_from_BM(BM)

def tokens_to_multihot(seq_ids: np.ndarray, id2bitmask, I:int) -> np.ndarray:
    T = int(seq_ids.shape[0])
    M = np.zeros((T, I), dtype=np.int32)
    for t in range(T):
        bm = id2bitmask[int(seq_ids[t])]
        i = min(I, bm.shape[0])
        M[t, :i] = bm[:i]
    return M


In [15]:
import pretty_midi as pm, soundfile as sf
from pathlib import Path

def infer_roles_from_BM(BM):
    N,T,I = BM.shape
    density = BM.mean(axis=(0,1))
    beats = [0,4,8,12]; backbeats = [4,12]
    onbeat = BM[:,beats,:].sum(axis=(0,1))
    back   = BM[:,backbeats,:].sum(axis=(0,1))
    hh_idx = int(np.argmax(density))
    mask = np.ones(I,bool); mask[hh_idx]=False
    sn_idx = int(np.argmax(np.where(mask, back, -1)))
    mask[sn_idx]=False
    kd_idx = int(np.argmax(np.where(mask, onbeat, -1)))
    return kd_idx, sn_idx, hh_idx

kd_idx, sn_idx, hh_idx = infer_roles_from_BM(BM)
rota=[41,43,45,47,48,51,49,37,46,57,59,54]
gm_per_index=[36]*I
if 0<=sn_idx<I: gm_per_index[sn_idx]=38
if 0<=hh_idx<I: gm_per_index[hh_idx]=42
rest=[j for j in range(I) if j not in (kd_idx,sn_idx,hh_idx)]
for k,j in enumerate(rest): gm_per_index[j]=rota[k%len(rota)]

def multihot_to_pretty_midi(M, bpm=100, gm_map=None):
    T,I = M.shape
    spb = 60.0/bpm
    bar = spb*4.0
    steps = T if (T in (16,32,64)) else 16
    step = bar/steps
    midi = pm.PrettyMIDI(initial_tempo=bpm)
    drum = pm.Instrument(program=0, is_drum=True)
    for t in range(T):
        hits = np.where(M[t]==1)[0]
        for j in hits:
            pitch = gm_map[j] if (gm_map and j<len(gm_map)) else 36
            start = float(t*step); end = float(start+0.10)
            drum.notes.append(pm.Note(velocity=120, pitch=int(pitch), start=start, end=end))
    midi.instruments.append(drum)
    return midi

def write_wav_from_pretty_midi(pm_obj, sf2_path, out_wav, sr=44100):
    out_wav = Path(out_wav); sf2_path = Path(sf2_path)
    try:
        audio = pm_obj.fluidsynth(sf2_path=str(sf2_path), fs=sr)
        sf.write(str(out_wav), audio, sr)
        return out_wav
    except Exception as e:
        print("[WARN] pretty_midi.fluidsynth falló, uso fallback midi2audio. Detalle:", e)
        mid_tmp = out_wav.with_suffix(".mid")
        pm_obj.write(str(mid_tmp))
        from midi2audio import FluidSynth
        FluidSynth(sound_font=str(sf2_path)).midi_to_audio(str(mid_tmp), str(out_wav))
        return out_wav


In [16]:
GENRES = ["jazz", "bossa", "samba", "hiphop", "afrocubano", "choro"]
NUM_SAMPLES_PER_GENRE = 3
BARS_PER_SAMPLE = 4
STEPS_PER_BAR = int(T)
TEMPERATURE = 1.0
TOP_K = 8
REST_IDS = {0}
BPM = 100

wav_dir = OUT_DIR / "audio_wav"
wav_dir.mkdir(exist_ok=True, parents=True)

def style_vector_for(name:str, S:int) -> np.ndarray:
    idx = {"jazz":0,"bossa":1,"samba":2,"hiphop":3,"afrocubano":4,"choro":5}.get(name, 0)
    z = np.zeros((S,), dtype=np.float32)
    if 0<=idx<S: z[idx]=1.0
    return z

samples_meta = []
for g in GENRES:
    kept = 0; attempts = 0
    while kept < NUM_SAMPLES_PER_GENRE and attempts < NUM_SAMPLES_PER_GENRE*8:
        attempts += 1
        z = None
        if sig["style"] is not None:
            S = int(model.inputs[sig["style"]].shape[-1])
            z = style_vector_for(g, S)
        bars_tokens = []
        for _ in range(BARS_PER_SAMPLE):
            seq = generate_bar(model, T=STEPS_PER_BAR, top_k=TOP_K, temperature=TEMPERATURE,
                               style_vec=z, rest_ids=REST_IDS)
            bars_tokens.append(seq)
        toks = np.concatenate(bars_tokens, axis=0)
        M = tokens_to_multihot(toks, id2bit, I=I)
        pm_obj = multihot_to_pretty_midi(M, bpm=BPM, gm_map=gm_per_index)
        fname = f"{g}_sample{kept+1:02d}_{BARS_PER_SAMPLE}bars_{BPM}bpm.wav"
        fpath = (wav_dir / fname)
        write_wav_from_pretty_midi(pm_obj, SF2_PATH, fpath, sr=44100)
        kept += 1
        samples_meta.append({"genre": g, "file": str(fpath)})
        print("✓", fname)
        display(Audio(filename=str(fpath)))
print("Resumen:", samples_meta[:3], "... total=", len(samples_meta))


✓ jazz_sample01_4bars_100bpm.wav


✓ jazz_sample02_4bars_100bpm.wav


✓ jazz_sample03_4bars_100bpm.wav


✓ bossa_sample01_4bars_100bpm.wav


✓ bossa_sample02_4bars_100bpm.wav


✓ bossa_sample03_4bars_100bpm.wav


✓ samba_sample01_4bars_100bpm.wav


✓ samba_sample02_4bars_100bpm.wav


✓ samba_sample03_4bars_100bpm.wav


✓ hiphop_sample01_4bars_100bpm.wav


✓ hiphop_sample02_4bars_100bpm.wav


✓ hiphop_sample03_4bars_100bpm.wav


✓ afrocubano_sample01_4bars_100bpm.wav


✓ afrocubano_sample02_4bars_100bpm.wav


✓ afrocubano_sample03_4bars_100bpm.wav


✓ choro_sample01_4bars_100bpm.wav


✓ choro_sample02_4bars_100bpm.wav


✓ choro_sample03_4bars_100bpm.wav


Resumen: [{'genre': 'jazz', 'file': 'C:\\Users\\Djcho\\Documents\\Repositories\\Polyrhythmia\\model_lstm\\outputs\\2025-10-15\\model1_sf2_gen\\audio_wav\\jazz_sample01_4bars_100bpm.wav'}, {'genre': 'jazz', 'file': 'C:\\Users\\Djcho\\Documents\\Repositories\\Polyrhythmia\\model_lstm\\outputs\\2025-10-15\\model1_sf2_gen\\audio_wav\\jazz_sample02_4bars_100bpm.wav'}, {'genre': 'jazz', 'file': 'C:\\Users\\Djcho\\Documents\\Repositories\\Polyrhythmia\\model_lstm\\outputs\\2025-10-15\\model1_sf2_gen\\audio_wav\\jazz_sample03_4bars_100bpm.wav'}] ... total= 18


In [17]:
import numpy as np, zipfile

act = BM.sum(axis=(0,1))
print("Activaciones por canal (dataset1):", act.tolist())

zip_path = OUT_DIR / "model1_audio_wav.zip"
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    for p in (OUT_DIR / "audio_wav").glob("*.wav"):
        zf.write(p, arcname=p.name)
print("ZIP listo:", zip_path)


Activaciones por canal (dataset1): [13388, 712, 35, 4822, 953, 114, 1559, 135, 1, 74, 2696, 159, 4, 195, 30, 744, 18, 10, 66, 1, 1, 4, 1, 6, 17660, 6884, 458, 11, 1380, 361, 38, 633, 21, 21, 900, 74, 48, 11, 1, 248, 185, 65, 2, 24, 88, 9, 5, 2, 11, 47081, 4486, 408, 2, 312, 3, 1, 211, 3, 714, 6, 1, 306, 2, 2, 1, 12306, 3603, 150, 1, 262, 2, 1, 102, 6, 274, 1, 129, 13, 6, 8, 21229, 7830, 1029, 107, 1369, 134, 52, 508, 86, 9, 1, 18, 1, 1, 1201, 125, 29, 6, 64, 2, 1, 21, 1, 2, 462, 6, 6, 26, 5, 7560, 2345, 231, 28, 279, 100, 18, 219, 8, 4, 15, 273, 41, 10, 5, 2, 244, 49, 21, 2, 7, 10, 10672, 1715, 342, 12, 94, 3, 1, 116, 2, 255, 6, 3, 1, 118, 1, 3, 3775, 1098, 167, 157, 79, 1, 115, 2, 1, 109, 61, 2, 1]
ZIP listo: C:\Users\Djcho\Documents\Repositories\Polyrhythmia\model_lstm\outputs\2025-10-15\model1_sf2_gen\model1_audio_wav.zip
