In [15]:
import tomllib
import numpy as np
from pprint import pprint
import librosa
import requests
from pydub import AudioSegment
from io import BytesIO
from SpotifyTrack import SpotifyTrack
from poc_test import estimate_tempo
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import scipy
import scipy.stats

with open("cfg.toml", "rb") as cfg:
    keys = tomllib.load(cfg)["spotify"]
    c_id = keys["client_id"]
    c_secret = keys["client_secret"]
    auth_manager = SpotifyClientCredentials(client_id=c_id, client_secret=c_secret)
    
sp = spotipy.Spotify(auth_manager=auth_manager)

In [16]:
# The Weeknd's "After Hours" 4yP0hdKOZPNshxUOjY0cZj
#(album_name, album_uri) = ('harvest','spotify:album:7hIjNhXoPqyARBpaKpS3nk')
# (album_name, album_uri) = ('sdp_my_eyes', 'spotify:album:2nhThWvrfDbjXvRfuwlsCE')
(album_name, album_uri) = ('SOS', 'spotify:album:07w0rG5TETcyihsEIZR3qG')
#(album_name, album_uri) = ('Favorite Worst Nightmare (Standard Edition)', 'spotify:album:6rsQnwaoJHxXJRCDBPkBRw')
#(album_name, album_uri) = ('Arcane Season 2 OST', 'spotify:album:23kFyuuRMapEZv6ReQUEit')
# (album_name, album_uri) = ('avril_lavigne', 'spotify:album:3zXjR3y2dUWklKmmp6lEhy')
# (album_name, album_uri) = ('travy_patty', 'spotify:album:18NOKLkZETa4sWwLMIm0UZ')

tracks_raw = sp.album_tracks(album_id=album_uri.split(':')[-1], market='CA')

tracks = [SpotifyTrack(**(tracks_raw['items'][i])) for i in range(len(tracks_raw['items']))]

In [17]:
#display raw track metadata
#tracks_raw

In [18]:
#display spotify track objects were created in memory
#tracks

In [19]:
#save the spotify wavs from the album
def save_wavs(tracks):
    for track in tracks:
        r = requests.get(track.preview_url)
        audio = AudioSegment.from_mp3(BytesIO(r.content))
        wav_path = f'wavs/{track.name.replace(' ', '').replace('?', '')}.wav'
        audio.export(wav_path, format='wav')

In [20]:
save_wavs(tracks)

In [21]:
#dynamic programing solution for beat tracking reverse engineering
#dynamic programming implementation of beat tracking - converted and adapted code from 
# https://www.tandfonline.com/doi/epdf/10.1080/09298210701653344?needAccess=true, librosa documentation. 

def compute_localscore(onset_envelope, period):
    window = np.exp(-0.5 *(np.arange(-period, period+1) * 32/period)**2)
    #I LOVE CONVOLUTION
    normalized_onset = onset_envelope/onset_envelope.std(ddof = 1)
    #print("ConVOLUTING!!1")
    return scipy.signal.convolve(normalized_onset, window, "same")

def dynamic_compute(local_score, period, tightness):
    #initiate dp arrays with zeroes
    backlink = np.zeros_like(local_score, dtype = int)
    cumulative_score = np.zeros_like(local_score)

    #define search range. Will be iteratively increased 
    window = np.arange(-2*period, -np.round(period/2)+1, dtype=int)

    tx_cost = -tightness * (np.log(-window/period) **2)
    first_beat = True
    for i, score_i in enumerate(local_score):
        #force range to not go past t = 0
        z_pad = np.maximum(0, min(-window[0], len(window)))
        scores = tx_cost.copy()
        scores[z_pad:] = scores [z_pad:]+cumulative_score[window[z_pad:]]
        beat_location = np.argmax(scores) #compute index of max
        cumulative_score[i] = score_i + scores[beat_location]

        #special case for intiation:
        if first_beat == True and score_i < 0.001*local_score.max(): 
            backlink[i] = -1
        else: 
            backlink[i] = window[beat_location]
            first_beat = False
        
        #update window range iteratively
        window += 1
    return backlink, cumulative_score

def compute_tempo(audio_file_path, hop_len=512, tightness=32.0, initial_bpm = 120):
    #initiate some variables for the beat tracking. we compute beats, then go to the global BPM
    #tightness - float scalar for how closely it adheres to bpm, default to 100. Seems like lower values works better?
    #audio_file_path is a pointer to the .wav of the audio, instead of the MP3 file

    #compute the onset from the provided audio file, compute starting BPM
    #signal, sampling_rate = librosa.load(audio_file_path, sr=None, mono=True)
    #print(f"sampling rate: {sampling_rate}")

    #hardcore the sampling rate to 11025
    signal, sampling_rate = librosa.load(audio_file_path, sr=11025, mono=True)
    print(f"sampling rate: {sampling_rate}")

    onset_envelope = librosa.onset.onset_strength(y=signal, sr = sampling_rate, hop_length=hop_len)

    #compute resolution of the fast fourrier transform
    fft_res = sampling_rate/hop_len
    period = np.round(60.0 *fft_res/initial_bpm)

    #compute automatic gain control of the onset envelope
    local_score = compute_localscore(onset_envelope, period)
    #dynamic programming time

    backlink, cumulative_score = dynamic_compute(local_score, period, tightness)
    #get the pos of the last beat
    #need to first compute the median score, and local maxes. return local maxes afterwards
    local_max_scores = librosa.util.localmax(cumulative_score)
    median_score = np.median(cumulative_score[np.argwhere(local_max_scores)])
    last_beat_location = np.argwhere((cumulative_score*local_max_scores*2>median_score)).max()

    beats = [last_beat_location]

    while backlink[beats[-1]] >= 0: beats.append(backlink[beats[-1]])

    # Put the beats in ascending order
    # Convert into an array of frame numbers
    beats = np.array(beats[::-1], dtype=int)

    # 
    first_beat_time, last_beat_time = librosa.frames_to_time((beats[0],beats[-1]),sr=sampling_rate)
    tempo = 60/((last_beat_time-first_beat_time)/(len(beats)-1))
    return tempo

#Testing time
#print(f"Dynamic Programming BPM method computed: {compute_tempo("mp3s/505_artic.mp3", tightness = 100.0, initial_bpm=120)}")

In [22]:
DP_method_tempos = [compute_tempo(f'wavs/{track.name.replace(' ', '').replace('?', '')}.wav').item() for track in tracks]

sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025
sampling rate: 11025


In [23]:
#get spotify's tempos
acc_tempos = [sp.audio_features(harvest_track.id)[0].get('tempo') for harvest_track in tracks]

In [24]:
pd.DataFrame(zip(DP_method_tempos, acc_tempos),
             index=(t.name for t in tracks),
             columns=['OurAlgoTempo', 'SpotifyTempo'])

Unnamed: 0,OurAlgoTempo,SpotifyTempo
SOS,120.043369,119.159
Kill Bill,117.823187,88.993
Seek & Destroy,114.84375,152.069
Low,126.346239,145.044
Love Language,124.112635,65.038
Blind,110.043499,114.225
Used (feat. Don Toliver),116.709278,150.015
Snooze,119.134415,143.008
Notice Me,118.945313,159.919
Gone Girl,121.886055,150.093


In [25]:
#compute the mean error for the entire album:
dp_errors = []
for i in range(0, len(acc_tempos)):
    dp_errors.append(abs(DP_method_tempos[i]-acc_tempos[i])/acc_tempos[i])

#print(errors)
print(f"average error of DP method: {np.mean(dp_errors)*100.0:0.2f}%")

average error of DP method: 23.30%


In [26]:
def beat_mean_method(audio_file_path):
    y, sr = librosa.load(audio_file_path, sr=None, mono=True)
    _, beats = librosa.beat.beat_track(y=y, sr=sr)
    first_beat_time, last_beat_time = librosa.frames_to_time((beats[0],beats[-1]),sr=sr)
    tempo = 60/((last_beat_time-first_beat_time)/(len(beats)-1))
    return tempo

def beat_track_method(audio_file_path):
    y, sr = librosa.load(audio_file_path, sr=None, mono=True)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    return tempo

In [None]:
bm_tempos = [beat_mean_method(f'wavs/{track.name.replace(' ', '').replace('?', '')}.wav').item() for track in tracks]
bt_tempos = [beat_track_method(f'wavs/{track.name.replace(' ', '').replace('?', '')}.wav').item() for track in tracks]

bt_errors = []
bm_errors = []
for i in range(0, len(acc_tempos)):
    bm_errors.append(abs(bm_tempos[i]-acc_tempos[i])/acc_tempos[i])
    bt_errors.append(abs(bt_tempos[i]-acc_tempos[i])/acc_tempos[i])

print(f"average error of BM method: {np.mean(bm_errors)*100.0:0.2f}%")
print(f"average error of BT method: {np.mean(bt_errors)*100.0:0.2f}%")
print(f"average error of DP method: {np.mean(dp_errors)*100.0:0.2f}%")

threshold = 0.03
bm_miss = 0
bt_miss = 0 
dp_miss = 0
for i in range(0, len(acc_tempos)):
    if abs((bm_tempos[i]-acc_tempos[i])/acc_tempos[i])>=threshold: bm_miss+=1
    if abs((bt_tempos[i]-acc_tempos[i])/acc_tempos[i])>=threshold: bt_miss+=1
    if abs((DP_method_tempos[i]-acc_tempos[i])/acc_tempos[i])>=threshold: dp_miss+=1

print(f"total misses for BM method: {bm_miss}/{len(acc_tempos)}")
print(f"total misses for BT method: {bt_miss}/{len(acc_tempos)}")
print(f"total misses for DP method: {dp_miss}/{len(acc_tempos)}")

pd.DataFrame(zip(DP_method_tempos,bm_tempos, bt_tempos, acc_tempos),
             index=(t.name for t in tracks),
             columns=['DP method','BM method', 'BT Method', 'SpotifyTempo'])


average error of BM method: 15.93%
average error of BT method: 16.16%
average error of DP method: 23.30%


TypeError: cannot unpack non-iterable int object