In [3]:
import spotipy
import tomllib
import os
from spotipy.oauth2 import SpotifyClientCredentials
import json
import numpy as np
import librosa
from pydub import AudioSegment
import scipy
import scipy.stats

with open("cfg.toml", "rb") as cfg:
    keys = tomllib.load(cfg)["spotify"]
    c_id = keys["client_id"]
    c_secret = keys["client_secret"]
    auth_manager = SpotifyClientCredentials(client_id=c_id, client_secret=c_secret)
    
sp = spotipy.Spotify(auth_manager=auth_manager)

In [None]:
#Spotify API info fetching

def get_song_name(track_id):
    #fetch ALL INFO. 
    info = sp.track(track_id)
    return info["name"]

def get_song_tempo(track_id):
    # Fetch audio features for the given track
    features = sp.audio_features(track_id)
    print(f"Song Track ID: {track_id}"); print(f"Song Name: {get_song_name(track_id)}")
    # print(f"ALL F EATURES: {features}")
    if features and features[0]: return features[0].get("tempo", "Tempo not found")
    return 'Invalid track ID or no features available'

def get_track_id(song_name):
    r = sp.search(q = song_name, type = "track", limit = 1)
    track = r.get("tracks", {}).get("items", [])
    if track: return track[0]['id']
    return f"Did not find {song_name}"

In [None]:
#helper conversion function
def mp3_to_wav(mp3_path, wav_path):
    """Convert an MP3 file to WAV using pydub."""
    audio = AudioSegment.from_mp3(mp3_path)
    audio.export(wav_path, format="wav")

In [45]:
def method1(audio_file_path):
    wav_path = "temp_audio.wav"
    mp3_to_wav(audio_file_path, wav_path)
    y, sr = librosa.load(wav_path, sr=None, mono=True)
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    #print(beats)
    print(f"ESTIMATED GLOBAL TEMPO BY LIBROSA: {tempo[0]} BPM" )

    first_beat_time, last_beat_time = librosa.frames_to_time((beats[0],beats[-1]),sr=sr)
    tempo2 = 60/((last_beat_time-first_beat_time)/(len(beats)-1))

    print(f"BPM computed from median of beats: {tempo2} BPM")
    # print('Estimated tempo: {:.2f} beats per minute'.format(tempo))
    # beat_times = librosa.frames_to_time(beat_frames, sr=sr)

def method2(audio_file_path):
    wav_path = "temp_audio.wav"
    mp3_to_wav(audio_file_path, wav_path)
    y, sr = librosa.load(wav_path, sr=None, mono=True)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
    print(f"ESTIMATE TEMPO BY Librosa.beat.tempo(): {tempo}")

method1("mp3s/505_artic.mp3")
#method2("mp3s/isha_eason_chan.mp3")


ESTIMATED GLOBAL TEMPO BY LIBROSA: 140.625 BPM
BPM computed from median of beats: 140.49201757801637 BPM


In [None]:
#dynamic programing solution for beat tracking reverse engineering
#dynamic programming implementation of beat tracking - converted and adapted code from 
# https://www.tandfonline.com/doi/epdf/10.1080/09298210701653344?needAccess=true, librosa documentation. 

def compute_localscore(onset_envelope, period):
    window = np.exp(-0.5 *(np.arange(-period, period+1) * 32/period)**2)
    #I LOVE CONVOLUTION
    normalized_onset = onset_envelope/onset_envelope.std(ddof = 1)
    return scipy.signal.convolve(normalized_onset, window, "same")

def dynamic_compute(local_score, period, tightness):
    #initiate dp arrays with zeroes
    backlink = np.zeros_like(local_score, dtype = int)
    cumulative_score = np.zeros_like(local_score)

    #define search range. Will be iteratively increased 
    window = np.arange(-2*period, -np.round(period/2)+1, dtype=int)

    tx_cost = -tightness * (np.log(-window/period) **2)
    first_beat = True
    for i, score_i in enumerate(local_score):
        #force range to not go past t = 0
        z_pad = np.maximum(0, min(-window[0], len(window)))
        scores = tx_cost.copy()
        scores[z_pad:] = scores [z_pad:]+cumulative_score[window[z_pad:]]
        beat_location = np.argmax(scores) #compute index of max
        cumulative_score[i] = score_i + scores[beat_location]

        #special case for intiation:
        if first_beat == True and score_i < 0.001*local_score.max(): 
            backlink[i] = -1
        else: 
            backlink[i] = window[beat_location]
            first_beat = False
        
        #update window range iteratively
        window += 1
    return backlink, cumulative_score

def compute_tempo(audio_file_path, hop_len=512, tightness=100.0, initial_bpm = 120):
    #initiate some variables for the beat tracking. we compute beats, then go to the global BPM
    #tightness - float scalar for how closely it adheres to bpm, default to 100
    
    #compute the onset from the provided audio file, compute starting BPM
    wav_path = "temp_audio.wav"
    mp3_to_wav(audio_file_path, wav_path)
    signal, sampling_rate = librosa.load(wav_path, sr=None, mono=True)
    onset_envelope = librosa.onset.onset_strength(y=signal, sr = sampling_rate, hop_length=hop_len)

    #compute resolution of the fast fourrier transform
    fft_res = sampling_rate/hop_len
    period = np.round(60.0 *fft_res/initial_bpm)

    #compute automatic gain control of the onset envelope
    local_score = compute_localscore(onset_envelope, period)
    #dynamic programming time

    backlink, cumulative_score = dynamic_compute(local_score, period, tightness)
    #get the pos of the last beat
    #need to first compute the median score, and local maxes. return local maxes afterwards
    local_max_scores = librosa.util.localmax(cumulative_score)
    median_score = np.median(cumulative_score[np.argwhere(local_max_scores)])
    last_beat_location = np.argwhere((cumulative_score*local_max_scores*2>median_score)).max()

    beats = [last_beat_location]

    while backlink[beats[-1]] >= 0: beats.append(backlink[beats[-1]])

    # Put the beats in ascending order
    # Convert into an array of frame numbers
    beats = np.array(beats[::-1], dtype=int)

    first_beat_time, last_beat_time = librosa.frames_to_time((beats[0],beats[-1]),sr=sampling_rate)
    tempo = 60/((last_beat_time-first_beat_time)/(len(beats)-1))
    return tempo

#Testing time
print(f"Dynamic Programming BPM method computed: {compute_tempo("mp3s/505_artic.mp3", tightness = 100.0, initial_bpm=120)}")

Dynamic Programming BPM method computed: 142.51893939393938


In [37]:
test_var = "505"
test_id = get_track_id(test_var)
print(f"Spotify's Tempo: {get_song_tempo(test_id)}")
print(f"Track ID: {test_id}")
print(f"sanity check using get_song_name: {get_song_name(test_id)}")

Song Track ID: 58ge6dfP91o9oXMzq3XkIS
Song Name: 505
Spotify's Tempo: 140.267
Track ID: 58ge6dfP91o9oXMzq3XkIS
sanity check using get_song_name: 505


In [14]:
print("\n Input your song in one line, enter qqq to quit: ")
test_var = str(input())
test_id = get_track_id(test_var)
print(f"Spotify's Tempo: {get_song_tempo(test_id)}")
print(f"Track ID: {test_id}")
print(f"sanity check using get_song_name: {get_song_name(test_id)}")


 Input your song in one line, enter qqq to quit: 
Song Track ID: 4kjI1gwQZRKNDkw1nI475M
Song Name: MY EYES
Spotify's Tempo: 119.043
Track ID: 4kjI1gwQZRKNDkw1nI475M
sanity check using get_song_name: MY EYES
