In [4]:
# ViSoND: Build MIDI matrix from Kilosort with rate filters + RPV filter + (optional) similarity ordering

import io, os, numpy as np, pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, FileLink

# ---------- UI ----------
times_upload   = widgets.FileUpload(accept='.npy', multiple=False, description='spike_times.npy')
clust_upload   = widgets.FileUpload(accept='.npy', multiple=False, description='spike_clusters.npy')

times_path = widgets.Text(description='Times path', placeholder='spike_times.npy', layout=widgets.Layout(width='48%'))
clust_path = widgets.Text(description='Clusters path', placeholder='spike_clusters.npy', layout=widgets.Layout(width='48%'))

sr = widgets.IntText(value=30000, description='Sample rate (Hz)')
times_in_samples = widgets.Checkbox(value=True, description='Times are in samples')

spike_dur = widgets.FloatText(value=0.01, description='Spike duration (s)')
vel = widgets.IntSlider(value=100, min=1, max=127, description='Velocity')
track_id = widgets.IntText(value=0, description='Track ID')
channel  = widgets.IntSlider(value=0, min=0, max=15, description='MIDI Channel')

min_rate_hz = widgets.FloatText(value=0.1, description='Min rate Hz')
max_rate_hz = widgets.FloatText(value=0.0, description='Max rate Hz (0=off)')
rpv_ms      = widgets.FloatText(value=2.0, description='Refractory (ms)')
max_rpv_frac= widgets.FloatText(value=0.05, description='Max RPV frac')
top_k_clusters = widgets.IntText(value=0, description='Keep top-K (0=all)')

order_dd = widgets.Dropdown(
    options=[
        'first appearance',
        'rate high→low (most active = lowest)',
        'rate low→high (most active = highest)',
        'cluster id ascending',
        'cluster id descending',
        'similarity: cosine',
        'similarity: EMD',
    ],
    value='similarity: cosine',
    description='Note order',
    layout=widgets.Layout(width='320px')
)
bin_s = widgets.FloatText(value=0.100, description='Bin (s) for similarity')

build_btn = widgets.Button(description='Build MIDI Matrix', button_style='success')
out = widgets.Output()

display(widgets.VBox([
    widgets.HTML("<b>Load Kilosort files</b>"),
    widgets.HBox([times_upload, clust_upload]),
    widgets.HBox([times_path, clust_path]),
    widgets.HBox([sr, times_in_samples]),
    widgets.HBox([spike_dur, vel]),
    widgets.HBox([track_id, channel]),
    widgets.HBox([min_rate_hz, max_rate_hz, top_k_clusters]),
    widgets.HBox([rpv_ms, max_rpv_frac]),
    widgets.HBox([order_dd, bin_s]),
    build_btn,
    widgets.HTML("<b>Output</b>"),
    out
]))

# ---------- Helpers ----------
def _get_upload_bytes(upl):
    if not upl.value: return None, None
    v = upl.value
    if isinstance(v, dict):  # ipywidgets v7
        name, info = next(iter(v.items()))
        return name, info['content']
    else:  # ipywidgets v8
        f = v[0]
        name = getattr(f, 'name', None) or f.get('name')
        content = getattr(f, 'content', None) or f.get('content')
        return name, content

def _load_npy_from_upload_or_path(upl, path_text, label):
    name, content = _get_upload_bytes(upl)
    if content:
        try:
            return np.load(io.BytesIO(content), allow_pickle=False), name
        except Exception as e:
            raise ValueError(f"Failed to load {label} from upload: {e}")
    p = (path_text.value or "").strip()
    if not p:
        raise ValueError(f"No {label} uploaded or path provided.")
    if not os.path.exists(p):
        raise ValueError(f"{label} path not found: {p}")
    return np.load(p, allow_pickle=False), p

def _binned_matrix(times_s, cluster_ids, bin_seconds):
    t0, t1 = float(times_s.min()), float(times_s.max())
    duration = max(1e-9, t1 - t0)
    nbins = max(1, int(np.ceil(duration / max(1e-6, float(bin_seconds)))))
    edges = np.linspace(t0, t1, nbins+1, endpoint=True)
    uniq = np.unique(cluster_ids)
    M = np.zeros((uniq.size, nbins), dtype=np.float32)
    for i, cid in enumerate(uniq):
        ts = times_s[cluster_ids == cid]
        hist, _ = np.histogram(ts, bins=edges)
        M[i, :] = hist
    return uniq, edges, M

def _cosine_distance_matrix(X):
    X = X.astype(np.float64)
    norms = np.linalg.norm(X, axis=1, keepdims=True); norms[norms == 0] = 1.0
    Y = X / norms
    S = np.clip(Y @ Y.T, -1.0, 1.0)
    D = 1.0 - S
    np.fill_diagonal(D, 0.0)
    return D

def _emd_distance_matrix(X, bin_edges):
    X = X.astype(np.float64)
    row_sums = X.sum(axis=1, keepdims=True); row_sums[row_sums == 0] = 1.0
    P = X / row_sums
    C = np.cumsum(P, axis=1)
    n = C.shape[0]
    D = np.zeros((n, n), dtype=np.float64)
    bw = np.diff(bin_edges)
    w = float(np.mean(bw)) if bw.size else 1.0
    for i in range(n):
        diff = np.abs(C[i][None, :] - C)
        D[i, :] = w * diff.sum(axis=1)
    np.fill_diagonal(D, 0.0)
    return D

def _order_by_similarity(times_s, cluster_ids, bin_seconds, method='cosine'):
    uniq, edges, M = _binned_matrix(times_s, cluster_ids, bin_seconds)
    D = _cosine_distance_matrix(M) if method == 'cosine' else _emd_distance_matrix(M, edges)
    # 1-D classical MDS
    J = np.eye(D.shape[0]) - np.ones(D.shape) / D.shape[0]
    B = -0.5 * J @ (D**2) @ J
    try:
        vals, vecs = np.linalg.eigh(B)
        idx = np.argsort(vals)[::-1]
        y = vecs[:, idx[0]] * np.sqrt(max(vals[idx[0]], 0))
    except Exception:
        y = -D.mean(axis=1)
    order = np.argsort(y)
    return uniq[order].tolist()

def _order_clusters(times_s, cluster_ids, strategy, bin_seconds=0.1):
    s = pd.Series(cluster_ids)
    uniq, counts = np.unique(s, return_counts=True)
    dur = float(times_s.max() - times_s.min()) if times_s.size else 1.0
    if dur <= 0: dur = 1.0
    rates = counts / dur
    strat = (strategy or '').lower()
    if 'similarity: cosine' in strat:
        return _order_by_similarity(times_s, cluster_ids, bin_seconds, method='cosine')
    if 'similarity: emd' in strat:
        return _order_by_similarity(times_s, cluster_ids, bin_seconds, method='emd')
    if 'rate high→low' in strat or 'rate high' in strat:
        dfm = pd.DataFrame({'id': uniq, 'rate': rates})
        return dfm.sort_values(['rate','id'], ascending=[False, True])['id'].tolist()
    if 'rate low→high' in strat or 'rate low' in strat:
        dfm = pd.DataFrame({'id': uniq, 'rate': rates})
        return dfm.sort_values(['rate','id'], ascending=[True, True])['id'].tolist()
    if 'cluster id ascending' in strat:
        return sorted(uniq.tolist())
    if 'cluster id descending' in strat:
        return sorted(uniq.tolist(), reverse=True)
    first_idx = s.drop_duplicates(keep='first').index
    first_vals = s.loc[first_idx].values
    return [v for v in first_vals if v in uniq]

def _compute_rpv_per_cluster(times_s, cluster_ids, refractory_s):
    """
    Efficient RPV per cluster:
    - Sort by (cluster, time), take ISIs within cluster
    - RPV = (# ISIs < refractory_s) / (# ISIs) ; clusters with <2 spikes => RPV=0
    Returns dict: cluster_id -> rpv_fraction
    """
    order = np.lexsort((times_s, cluster_ids))  # primary: cluster, secondary: time
    t_sorted = times_s[order]
    c_sorted = cluster_ids[order]
    isi = np.diff(t_sorted)
    same = c_sorted[1:] == c_sorted[:-1]
    isi_same = isi[same]
    cluster_for_isi = c_sorted[1:][same]
    # total ISIs per cluster
    uniq_c, total_isi = np.unique(cluster_for_isi, return_counts=True)
    # violation ISIs per cluster
    vio_mask = isi_same < refractory_s
    uniq_v, vio_counts = np.unique(cluster_for_isi[vio_mask], return_counts=True)
    # build dict with defaults
    rpv = {int(c): 0.0 for c in np.unique(cluster_ids)}
    total_dict = dict(zip(uniq_c.astype(int), total_isi.astype(int)))
    vio_dict = dict(zip(uniq_v.astype(int), vio_counts.astype(int)))
    for c in rpv.keys():
        tot = total_dict.get(c, 0)
        if tot <= 0:
            rpv[c] = 0.0
        else:
            rpv[c] = float(vio_dict.get(c, 0)) / float(tot)
    return rpv

def _build_matrix(times, clusters,
                  sr_hz, times_are_samples,
                  spike_dur_s, velocity, track, chan,
                  min_rate, max_rate, rpv_window_ms, max_rpv, top_k, note_order, bin_seconds):
    # Basic checks
    times = np.asarray(times).squeeze()
    clusters = np.asarray(clusters).squeeze()
    if times.ndim != 1 or clusters.ndim != 1:
        raise ValueError("Inputs must be 1D arrays.")
    if times.shape[0] != clusters.shape[0]:
        raise ValueError(f"Length mismatch: spike_times ({times.shape[0]}) vs spike_clusters ({clusters.shape[0]}).")

    # Convert times to seconds if needed
    if times_are_samples:
        if sr_hz <= 0:
            raise ValueError("Sample rate must be positive when converting samples to seconds.")
        t_sec = times.astype(np.float64) / float(sr_hz)
    else:
        t_sec = times.astype(np.float64)

    # Recording duration
    rec_dur = float(np.max(t_sec) - np.min(t_sec)) if t_sec.size else 0.0
    if rec_dur <= 0:
        rec_dur = 1.0
        rec_warn = True
    else:
        rec_warn = False

    # Global per-cluster stats
    uniq_all, counts_all = np.unique(clusters, return_counts=True)
    rates_all = counts_all / rec_dur

    # --- (1) Min/Max rate filters ---
    keep = rates_all >= float(min_rate)
    if max_rate and float(max_rate) > 0:
        keep &= (rates_all <= float(max_rate))
    clusters_rate_kept = uniq_all[keep]
    counts_rate_kept   = counts_all[keep]

    # Apply rate mask to spikes for RPV computation
    mask_rate_spikes = np.isin(clusters, clusters_rate_kept)
    t_rate = t_sec[mask_rate_spikes]
    c_rate = clusters[mask_rate_spikes]

    # --- (2) RPV filter ---
    refr_s = float(rpv_window_ms) / 1000.0
    rpv_dict = _compute_rpv_per_cluster(t_rate, c_rate, refr_s)
    # keep clusters with RPV <= max_rpv
    clusters_rpv_kept = [c for c in clusters_rate_kept if rpv_dict.get(int(c), 0.0) <= float(max_rpv)]
    clusters_rpv_kept = np.array(clusters_rpv_kept, dtype=clusters_rate_kept.dtype)

    # counts after RPV (for top-K)
    if clusters_rpv_kept.size > 0:
        mask_rpv_spikes = np.isin(clusters, clusters_rpv_kept)
        _, counts_after_rpv = np.unique(clusters[mask_rpv_spikes], return_counts=True)
    else:
        counts_after_rpv = np.array([], dtype=int)

    # --- (3) Top-K by count ---
    kept_for_topk = clusters_rpv_kept
    counts_for_topk = counts_after_rpv
    if top_k and int(top_k) > 0 and kept_for_topk.size > int(top_k):
        order_desc = np.argsort(-counts_for_topk)
        kept_for_topk = kept_for_topk[order_desc[:int(top_k)]]

    # Final spike mask
    final_mask = np.isin(clusters, kept_for_topk)
    t_final = t_sec[final_mask]
    c_final = clusters[final_mask]

    # Sort spikes by time (stable)
    order = np.argsort(t_final, kind='mergesort')
    t_final = t_final[order]
    c_final = c_final[order]

    # --- Ordering for note_id on kept spikes ---
    ordered_clusters = _order_clusters(t_final, c_final, note_order, bin_seconds=bin_seconds)
    rank_map = {cid: i for i, cid in enumerate(ordered_clusters)}
    note_ids = np.array([rank_map[c] for c in c_final], dtype=int)

    # Build matrix [track, channel, note_id, velocity, start_s, end_s]
    n = t_final.shape[0]
    M = np.empty((n, 6), dtype=object)
    M[:, 0] = int(track)
    M[:, 1] = int(chan)
    M[:, 2] = note_ids
    M[:, 3] = int(np.clip(velocity, 1, 127))
    M[:, 4] = t_final
    M[:, 5] = t_final + float(spike_dur_s)

    # Summaries
    n_after_rate = int(clusters_rate_kept.size)
    n_after_rpv  = int(clusters_rpv_kept.size)
    n_after_topk = int(len(np.unique(kept_for_topk)))
    summary = {
        "clusters_total":          int(len(uniq_all)),
        "clusters_after_rate":     n_after_rate,
        "clusters_after_rpv":      n_after_rpv,
        "clusters_after_topk":     n_after_topk,
        "spikes_total":            int(times.shape[0]),
        "spikes_after_filters":    int(n),
        "recording_duration_s":    float(rec_dur),
        "rec_duration_warning":    rec_warn,
        "ordering":                note_order,
        "bin_seconds":             float(bin_seconds),
        "refractory_ms":           float(rpv_window_ms),
        "max_rpv_frac":            float(max_rpv)
    }
    # Per-cluster RPV (only for kept-after-rate clusters)
    summary["rpv_examples"] = {int(c): round(float(rpv_dict.get(int(c), 0.0)), 4) for c in clusters_rate_kept[:10]}
    return M, summary, ordered_clusters

def _preview_matrix(M, k=10):
    df = pd.DataFrame(M, columns=["track","channel","note_id","velocity","start_s","end_s"])
    display(df.head(k))
    return df

def _save_outputs(M, outstem="kilo_visond"):
    npy_path = f"{outstem}.npy"
    csv_path = f"{outstem}.csv"
    np.save(npy_path, M)
    pd.DataFrame(M, columns=["track","channel","note_id","velocity","start_s","end_s"]).to_csv(csv_path, index=False)
    return npy_path, csv_path

def _build_clicked(_):
    out.clear_output()
    with out:
        try:
            times, t_name = _load_npy_from_upload_or_path(times_upload, times_path, "spike_times.npy")
            clust, c_name = _load_npy_from_upload_or_path(clust_upload, clust_path, "spike_clusters.npy")
            print(f"Loaded times:    {t_name} shape={times.shape}")
            print(f"Loaded clusters: {c_name} shape={clust.shape}")

            M, info, ordered_clusters = _build_matrix(
                times, clust,
                sr_hz=sr.value,
                times_are_samples=times_in_samples.value,
                spike_dur_s=spike_dur.value,
                velocity=vel.value,
                track=track_id.value,
                chan=channel.value,
                min_rate=min_rate_hz.value,
                max_rate=max_rate_hz.value,
                rpv_window_ms=rpv_ms.value,
                max_rpv=max_rpv_frac.value,
                top_k=top_k_clusters.value,
                note_order=order_dd.value,
                bin_seconds=bin_s.value
            )

            print(f"\nRecording duration: {info['recording_duration_s']:.3f} s"
                  + (" [estimated]" if info['rec_duration_warning'] else ""))
            print(f"Clusters: total → after rate → after RPV → after topK : "
                  f"{info['clusters_total']} → {info['clusters_after_rate']} → {info['clusters_after_rpv']} → {info['clusters_after_topk']}")
            print(f"Spikes:   total → after filters: {info['spikes_total']} → {info['spikes_after_filters']}")
            print(f"RPV: window={info['refractory_ms']} ms, max frac={info['max_rpv_frac']}")
            if info.get("rpv_examples"):
                print("Sample RPV (first 10 clusters after rate filter):")
                for cid, r in info["rpv_examples"].items():
                    print(f"  cluster {cid}: RPV={r}")

            print(f"Ordering: {info['ordering']}  |  Bin: {info['bin_seconds']:.3f} s")
            print("First 12 cluster→note_id mappings:")
            for cid, nid in list({cid: i for i, cid in enumerate(ordered_clusters)}.items())[:12]:
                print(f"  cluster {cid} -> note_id {nid}")

            _preview_matrix(M, k=10)
            npy_path, csv_path = _save_outputs(M, outstem="kilo_visond")
            print("\nSaved:")
            display(FileLink(npy_path))
            display(FileLink(csv_path))
            print("\nUse this .npy or .csv as input to the ViSoND render cell.")
        except Exception as e:
            import traceback
            traceback.print_exc()

build_btn.on_click(_build_clicked)

VBox(children=(HTML(value='<b>Load Kilosort files</b>'), HBox(children=(FileUpload(value=(), accept='.npy', de…

In [1]:
# ViSoND one-shot UI — upload OR path + options + STRICT unique scale-ladder mapping + render
# (Pitch ordering removed.)

import io, numpy as np, pandas as pd, mido
import ipywidgets as widgets
from mido import MetaMessage
from IPython.display import display, HTML, FileLink

# ---------------- UI ----------------
upload = widgets.FileUpload(accept='.csv,.npy', multiple=False)
path_txt = widgets.Text(
    description='Path',
    placeholder='e.g., bigdata.npy (already in this folder)',
    layout=widgets.Layout(width='50%')
)
bpm_slider = widgets.IntSlider(value=120, min=20, max=960, description='BPM')
tpb_slider = widgets.IntSlider(value=5000, min=16, max=10000, description='Ticks/QN')
root_dd = widgets.Dropdown(options=['C','C#','D','D#','E','F','F#','G','G#','A','A#','B'],
                           value='C', description='Root')
scale_dd = widgets.Dropdown(options=[], value=None, description='Scale', layout=widgets.Layout(width='300px'))
render_btn = widgets.Button(description='Render MIDI', button_style='success')
log = widgets.Output()

display(widgets.VBox([
    widgets.HTML("<b>Upload .csv or .npy (cols 0..5: track,channel,note,velocity,start_s,end_s)</b>"),
    widgets.HBox([upload, path_txt]),
    widgets.HBox([bpm_slider, tpb_slider]),
    widgets.HBox([root_dd, scale_dd]),
    render_btn,
    widgets.HTML("<b>Log</b>"),
    log
]))

# ------------- Helpers & Scales -------------
def _get_uploaded_name_and_bytes(upl):
    v = upl.value
    if not v: return None, None
    # ipywidgets v7: dict; v8: tuple
    if isinstance(v, dict):
        name, info = next(iter(v.items()))
        return name, info['content']
    else:
        f = v[0]
        name = getattr(f, 'name', None) or f.get('name')
        content = getattr(f, 'content', None) or f.get('content')
        return name, content

NOTE_TO_SEMITONE = {'C':0,'C#':1,'D':2,'D#':3,'E':4,'F':5,'F#':6,'G':7,'G#':8,'A':9,'A#':10,'B':11}

# Extended scale set (12-TET approximations as needed)
SCALE_INTERVALS = {
    "Chromatic (12-tone)": list(range(12)),
    "Ionian (Major)"            : [0, 2, 4, 5, 7, 9, 11],
    "Dorian"                    : [0, 2, 3, 5, 7, 9, 10],
    "Phrygian"                  : [0, 1, 3, 5, 7, 8, 10],
    "Lydian"                    : [0, 2, 4, 6, 7, 9, 11],
    "Mixolydian"                : [0, 2, 4, 5, 7, 9, 10],
    "Aeolian (Natural Minor)"   : [0, 2, 3, 5, 7, 8, 10],
    "Locrian"                   : [0, 1, 3, 5, 6, 8, 10],
    "Pentatonic Major"                 : [0, 2, 4, 7, 9],
    "Pentatonic Minor"                 : [0, 3, 5, 7, 10],
    "Pentatonic Egyptian"              : [0, 2, 5, 7, 10],
    "Pentatonic Yo (Japanese)"         : [0, 2, 5, 7, 9],
    "Pentatonic In Sen (Japanese)"     : [0, 1, 5, 7, 10],
    "Pentatonic Hirajoshi (Japanese)"  : [0, 2, 3, 7, 8],
    "Pentatonic Iwato (Japanese)"      : [0, 1, 5, 6, 10],
    "Pentatonic Dorian"                : [0, 2, 3, 7, 9],
    "Blues Minor (hexatonic)" : [0, 3, 5, 6, 7, 10],
    "Blues Major (hexatonic)" : [0, 2, 3, 4, 7, 9],
    "Chinese Gong (宫)" : [0, 2, 4, 7, 9],
    "Chinese Shang (商)": [0, 2, 5, 7, 10],
    "Chinese Jiao (角)" : [0, 3, 5, 8, 10],
    "Chinese Zhi (徵)"  : [0, 2, 5, 7, 9],
    "Chinese Yu (羽)"   : [0, 3, 5, 7, 10],
    "Ethiopian Tezeta Major": [0, 2, 4, 7, 9],
    "Ethiopian Tezeta Minor": [0, 3, 5, 7, 10],
    "Ethiopian Bati Major"  : [0, 2, 4, 7, 9],
    "Ethiopian Bati Minor"  : [0, 2, 3, 7, 9],
    "Ethiopian Ambassel"    : [0, 3, 5, 7, 10],
    "Ethiopian Anchihoye"   : [0, 2, 3, 7, 9],
}
scale_dd.options = list(SCALE_INTERVALS.keys())
scale_dd.value = "Chromatic (12-tone)"

def build_allowed_pitches(root, scale, lo=0, hi=127):
    root_pc = NOTE_TO_SEMITONE[root]
    pattern = SCALE_INTERVALS[scale]
    return np.array([n for n in range(lo, hi+1) if (n - root_pc) % 12 in pattern], dtype=int)

def map_unique_to_scale_ladder_strict(values, root, scale, base_note=None):
    """
    Order-preserving, 1:1 mapping into a scale ladder.
    Raises ValueError if there are more unique values than capacity (0..127).
    Uses first-appearance order; no pitch ordering UI here.
    """
    vals = pd.Series(values)
    uniq = vals.dropna().drop_duplicates().tolist()
    nuniq = len(uniq)

    allowed = build_allowed_pitches(root, scale, lo=0, hi=127)
    if allowed.size == 0:
        raise ValueError(f"Scale '{scale}' with root '{root}' has no allowed pitches in 0–127.")

    start_idx = 0 if base_note is None else int(np.searchsorted(allowed, np.clip(int(base_note), 0, 127)))
    capacity = max(0, allowed.size - start_idx)
    if capacity <= 0:
        start_idx = 0
        capacity = allowed.size

    if nuniq > capacity:
        raise ValueError(f"Too many notes: {nuniq} unique IDs, but only {capacity} pitches available "
                         f"for scale '{scale}' in MIDI 0–127. Choose a larger scale or reduce uniqueness.")

    targets = allowed[start_idx:start_idx + nuniq]
    mapping = dict(zip(uniq, targets))
    return vals.map(mapping).fillna(60).astype(int).to_numpy()

# ---------------- Render with progress bars ----------------
def render_midi(_btn=None):
    with log:
        log.clear_output()
        try:
            # Load from upload or path
            name, content = _get_uploaded_name_and_bytes(upload)
            if content:
                if str(name).lower().endswith('.csv'):
                    df = pd.read_csv(io.BytesIO(content), header=None)
                elif str(name).lower().endswith('.npy'):
                    arr = np.load(io.BytesIO(content), allow_pickle=True)
                    df = pd.DataFrame(arr)
                else:
                    print("Unsupported file type (use .csv or .npy)."); return
            else:
                p = (path_txt.value or "").strip()
                if not p:
                    print("No file uploaded AND no path provided."); return
                name = p
                if p.lower().endswith('.csv'):
                    df = pd.read_csv(p, header=None)
                elif p.lower().endswith('.npy'):
                    arr = np.load(p, allow_pickle=True)
                    df = pd.DataFrame(arr)
                else:
                    print("Unsupported file type (use .csv or .npy)."); return

            if df.shape[1] < 6:
                print(f"Need ≥6 columns (track,channel,note,velocity,start_s,end_s). Got {df.shape[1]}."); return

            # Clean columns
            df = df.copy()
            df[0] = pd.to_numeric(df[0], errors="coerce").astype("Int64")
            df[1] = pd.to_numeric(df[1], errors="coerce").fillna(0).clip(0,15).astype(int)
            df[2] = pd.to_numeric(df[2], errors="coerce").fillna(60).clip(0,127).astype(int)
            df[3] = pd.to_numeric(df[3], errors="coerce").fillna(64).clip(1,127).astype(int)
            df[4] = pd.to_numeric(df[4], errors="coerce")
            df[5] = pd.to_numeric(df[5], errors="coerce")
            df = df.dropna(subset=[0,4,5])
            df = df[df[5] > df[4]]

            # Apply strict unique scale mapping (no extra ordering)
            scale_name = scale_dd.value or "Chromatic (12-tone)"
            if not str(scale_name).startswith("Chromatic"):
                df[2] = map_unique_to_scale_ladder_strict(df[2], root_dd.value, scale_name)
                print(f"Applied unique ladder: {len(np.unique(df[2]))} unique pitches in '{root_dd.value} {scale_name}'")

            n_rows = len(df)
            n_events_total = 2 * n_rows
            print(f"Notes: {n_rows:,}  |  MIDI events (on+off): {n_events_total:,}")

            MAX_EVENTS = 20_000_000
            if n_events_total > MAX_EVENTS:
                print(f"Too many events (> {MAX_EVENTS:,}). Reduce data or raise MAX_EVENTS."); return

            # Prepare MIDI
            bpm = bpm_slider.value
            tpb = tpb_slider.value
            tempo = mido.midi.parse.tempo2bpm if False else mido.bpm2tempo(bpm)  # keep simple
            mid = mido.MidiFile(type=1, ticks_per_beat=tpb)

            # Track 0: tempo/meta
            meta = mido.MidiTrack()
            meta.append(MetaMessage('set_tempo', tempo=mido.bpm2tempo(bpm), time=0))
            mid.tracks.append(meta)

            def sec2ticks(s):
                return int(round(s * tpb * (1_000_000 / mido.bpm2tempo(bpm))))

            # Progress bars
            trk_ids = sorted(df[0].astype(int).unique().tolist())
            prog_tracks = widgets.IntProgress(min=0, max=len(trk_ids), value=0, description='Tracks')
            avg_events = max(1, n_events_total // max(1,len(trk_ids)))
            prog_events = widgets.IntProgress(min=0, max=avg_events, value=0, description='Events')
            status = widgets.HTML("Starting…")
            display(prog_tracks, prog_events, status)

            # Build tracks
            for ti, trk_id in enumerate(trk_ids, start=1):
                status.value = f"<b>Track {ti}/{len(trk_ids)}</b> (ID {trk_id}) — preparing…"
                td = df[df[0] == trk_id].sort_values(4)

                ch   = td[1].to_numpy(dtype=int)
                note = td[2].to_numpy(dtype=int)
                vel  = td[3].to_numpy(dtype=int)
                t_on = td[4].to_numpy(dtype=float)
                t_off= td[5].to_numpy(dtype=float)
                bad = t_off <= t_on
                if np.any(bad):
                    t_off = t_off.copy(); t_off[bad] = t_on[bad] + 1e-4

                on_ticks  = (t_on  * tpb * (1_000_000 / mido.bpm2tempo(bpm))).round().astype(int)
                off_ticks = (t_off * tpb * (1_000_000 / mido.bpm2tempo(bpm))).round().astype(int)

                n = len(td)
                ev = np.empty((2*n, 5), dtype=int)
                ev[0::2, 0] = on_ticks; ev[0::2, 1] = 1; ev[0::2, 2] = note; ev[0::2, 3] = vel; ev[0::2, 4] = ch
                ev[1::2, 0] = off_ticks; ev[1::2, 1] = 0; ev[1::2, 2] = note; ev[1::2, 3] = 0;   ev[1::2, 4] = ch

                order = np.lexsort((ev[:,1], ev[:,0]))
                ev = ev[order]

                prog_events.max = max(1, ev.shape[0])
                prog_events.value = 0
                status.value = f"<b>Track {ti}/{len(trk_ids)}</b> — writing {ev.shape[0]:,} events…"

                tr = mido.MidiTrack(); mid.tracks.append(tr)
                prev = 0
                STEP = 5000
                for i in range(ev.shape[0]):
                    tick, onoff, nte, vvv, chh = ev[i]
                    dt = tick - prev
                    if dt < 0: dt = 0
                    tr.append(mido.Message('note_on' if onoff == 1 else 'note_off',
                                           note=int(nte), velocity=int(vvv),
                                           time=int(dt), channel=int(chh)))
                    prev = tick
                    if (i % STEP) == 0:
                        prog_events.value = i

                prog_events.value = ev.shape[0]
                prog_tracks.value = ti
                status.value = f"Finished Track {ti}/{len(trk_ids)}"

            out_name = (str(name).rsplit('.',1)[0] or 'visond') + '_ViSoND.mid'
            mid.save(out_name)
            status.value = f"<b>Done.</b> Saved {out_name}"
            display(FileLink(out_name))
            display(HTML(f'<a href="/files/{out_name}" download>Download {out_name}</a>'))

        except ValueError as e:
            print(str(e))
        except Exception as e:
            print(f"Error: {type(e).__name__}: {e}")

# Init scales
scale_dd.options = list(SCALE_INTERVALS.keys())
scale_dd.value = "Chromatic (12-tone)"

render_btn.on_click(render_midi)
print("UI ready: upload or path → set options → Render MIDI")

VBox(children=(HTML(value='<b>Upload .csv or .npy (cols 0..5: track,channel,note,velocity,start_s,end_s)</b>')…

UI ready: upload or path → set options → Render MIDI
