In [1]:

import numpy as np
import librosa as lr

def _band_energy_ratio(S_power, sr, fmin, fmax):
    """Energy in [fmin,fmax] divided by total energy."""
    freqs = lr.fft_frequencies(sr=sr, n_fft=(S_power.shape[0]-1)*2)
    idx = (freqs >= fmin) & (freqs <= fmax)
    mid = S_power[idx].sum()
    tot = S_power.sum() + 1e-12
    return float(mid / tot)

def _voiced_frame_ratio(y, sr):
    """Fraction of frames where pyin finds a pitch (singing/speech-like)."""
    try:
        f0, voiced, _ = lr.pyin(
            y, fmin=65, fmax=1000,
            frame_length=2048, hop_length=512
        )
        if f0 is None:
            return 0.0
        return float(np.mean(~np.isnan(f0)))
    except Exception:
        # pyin can fail on very short or noisy audio
        return 0.0

def instrumentalness_score(y, sr):
    """Return float in [0, 1]: higher means more likely instrumental."""
    # Trim silence to reduce intro/outro bias
    y, _ = lr.effects.trim(y, top_db=30)

    # STFT power spectrogram
    S = np.abs(lr.stft(y, n_fft=2048, hop_length=512))**2

    # 1) Voiced-frame ratio (singing proxy)
    vfr = _voiced_frame_ratio(y, sr)  # 0..1, higher means more likely vocals

    # 2) Mid-band energy ratio ~300–3000 Hz (vocal formant region)
    mer = _band_energy_ratio(S, sr, fmin=300.0, fmax=3000.0)  # 0..1

    # Light compression to avoid one term dominating
    vfr_c = vfr**0.8
    mer_c = mer**0.8

    # Combine with simple weights (tweak as needed)
    vocal_likeness = 0.6 * vfr_c + 0.4 * mer_c
    score = 1.0 - vocal_likeness

    # Clamp
    return float(np.clip(score, 0.0, 1.0))

def instrumentalness_from_file(path, target_sr=22050, duration=None):
    """Load audio, resample to target_sr for consistency, optional duration (seconds)."""
    y, sr = lr.load(path, sr=target_sr, mono=True, duration=duration)
    # Normalize
    y = lr.util.normalize(y)
    return instrumentalness_score(y, sr)


In [4]:

# Replace with your file path. You can also upload a file into the notebook runtime.
AUDIO_PATH = "/Users/ethanshen/Documents/GitHub/rag-music-recs/agentic-ai-music-recommendation-system/Saladik Ătjum.mp3"  # e.g., "uploads/my_track.mp3"

try:
    score = instrumentalness_from_file(AUDIO_PATH)
    print(f"Instrumentalness (no-ML): {score:.3f}")
    if score >= 0.80:
        print("Likely instrumental")
    elif score <= 0.20:
        print("Likely vocal")
    else:
        print("Uncertain")
except FileNotFoundError:
    print("Provide a valid AUDIO_PATH to an existing file and rerun this cell.")


Instrumentalness (no-ML): 0.493
Uncertain
