# BPM Extraction from Spotify Preview URLs

Testing librosa-based BPM extraction from Spotify's 30-second preview clips.


In [12]:
# Install required packages if needed
!pip install librosa soundfile spotipy python-dotenv requests matplotlib spotify_preview_finder


Collecting spotify_preview_finder
  Downloading spotify_preview_finder-1.2.0-py3-none-any.whl.metadata (1.4 kB)
Downloading spotify_preview_finder-1.2.0-py3-none-any.whl (3.9 kB)
Installing collected packages: spotify_preview_finder
Successfully installed spotify_preview_finder-1.2.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [14]:
import librosa
import librosa.display
import numpy as np
import requests
import tempfile
import os
from dotenv import load_dotenv
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import matplotlib.pyplot as plt
import spotify_preview_finder

load_dotenv()

True

In [17]:
dir(spotify_preview_finder.finder)

# Initialize spotify-preview-finder
# preview_finder = spotify_preview_finder.finder(
#     client_id=os.getenv('SPOTIFY_CLIENT_ID'),
#     client_secret=os.getenv('SPOTIFY_CLIENT_SECRET')
# )
# print("✓ Initialized spotify-preview-finder")

['BeautifulSoup',
 'SpotifyClientCredentials',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'create_spotify_client',
 'get_preview_urls_from_html',
 'requests',
 'search_and_get_links',
 'spotipy']

## 1. Get a sample track from Spotify


In [6]:
# Initialize Spotify client
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=os.getenv('SPOTIFY_CLIENT_ID'),
    client_secret=os.getenv('SPOTIFY_CLIENT_SECRET'),
    redirect_uri=os.getenv('SPOTIFY_REDIRECT_URI', 'http://127.0.0.1:3000/callback'),
    scope='playlist-read-private playlist-read-collaborative',
    cache_path='.spotify_cache',
    open_browser=True
))

print("✓ Spotify client initialized")


✓ Spotify client initialized


In [7]:
# Get a playlist or use a specific track
# Option 1: Use a playlist URL
playlist_url = "https://open.spotify.com/playlist/6ENxgIEdvQK45A3sLIq6t0"

# Extract playlist ID
playlist_id = playlist_url.split('playlist/')[-1].split('?')[0]

# Get first 5 tracks from playlist
results = sp.playlist_tracks(playlist_id, limit=5)
tracks = results['items']

# Extract track info
sample_tracks = []
for item in tracks:
    if item['track']:
        track = item['track']
        sample_tracks.append({
            'title': track['name'],
            'artist': track['artists'][0]['name'],
            'preview_url': track.get('preview_url'),
            'track_id': track['id']
        })

print(f"Found {len(sample_tracks)} tracks:")
for i, track in enumerate(sample_tracks, 1):
    has_preview = "✓" if track['preview_url'] else "✗"
    print(f"{i}. {track['title']} - {track['artist']} {has_preview}")


Found 5 tracks:
1. Tell Me You Love Me - Peach Pit ✗
2. The Hunger - The Distillers ✗
3. Downstairs - Twenty One Pilots ✗
4. Flood - Flycatcher ✗
5. Sinking Feeling - LEAP ✗


## 1.5 Test spotify-preview-finder for tracks without API preview URLs


In [10]:
# Try to get preview URLs using spotify-preview-finder for tracks without API URLs
print("Testing spotify-preview-finder for tracks without API preview URLs:\n")
print(f"{'#':<4} {'Title':<35} {'Artist':<25} {'Finder Result'}")
print("-" * 80)

for i, track in enumerate(sample_tracks, 1):
    if not track['preview_url']:
        try:
            finder_url = preview_finder.get_preview_url(track['title'], track['artist'])
            result = "✓ Found" if finder_url else "✗ Not found"
            if finder_url:
                track['preview_url_from_finder'] = finder_url
        except Exception as e:
            result = f"✗ Error: {str(e)[:30]}"
    else:
        result = "N/A (has API URL)"
    
    title = track['title'][:33] + '...' if len(track['title']) > 33 else track['title']
    artist = track['artist'][:23] + '...' if len(track['artist']) > 23 else track['artist']
    print(f"{i:<4} {title:<35} {artist:<25} {result}")


Testing spotify-preview-finder for tracks without API preview URLs:

#    Title                               Artist                    Finder Result
--------------------------------------------------------------------------------
1    Tell Me You Love Me                 Peach Pit                 ✗ Error: name 'preview_finder' is not d
2    The Hunger                          The Distillers            ✗ Error: name 'preview_finder' is not d
3    Downstairs                          Twenty One Pilots         ✗ Error: name 'preview_finder' is not d
4    Flood                               Flycatcher                ✗ Error: name 'preview_finder' is not d
5    Sinking Feeling                     LEAP                      ✗ Error: name 'preview_finder' is not d


## 2. Download and analyze a single track (using preview URL from API or finder)


In [None]:
def download_preview(preview_url):
    """Download preview audio to temporary file."""
    if not preview_url:
        return None
    
    try:
        response = requests.get(preview_url, timeout=10)
        response.raise_for_status()
        
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
        temp_file.write(response.content)
        temp_file.close()
        
        return temp_file.name
    except Exception as e:
        print(f"Error downloading: {e}")
        return None

# Pick a track with a preview URL
test_track = next((t for t in sample_tracks if t['preview_url']), None)

if test_track:
    print(f"Testing: {test_track['title']} - {test_track['artist']}")
    audio_file = download_preview(test_track['preview_url'])
    
    if audio_file:
        print(f"✓ Downloaded to: {audio_file}")
        file_size = os.path.getsize(audio_file) / 1024
        print(f"  File size: {file_size:.1f} KB")
    else:
        print("✗ Failed to download")
else:
    print("No tracks with preview URLs found!")


No tracks with preview URLs found!


In [9]:
# Load audio with librosa
y, sr = librosa.load(audio_file, duration=30)

print(f"Sample rate: {sr} Hz")
print(f"Audio length: {len(y)/sr:.2f} seconds")
print(f"Audio shape: {y.shape}")

# Plot waveform
plt.figure(figsize=(14, 4))
librosa.display.waveshow(y, sr=sr)
plt.title(f"Waveform: {test_track['title']}")
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.tight_layout()
plt.show()


NameError: name 'audio_file' is not defined

## 4. Calculate BPM using librosa


In [None]:
# Calculate tempo/BPM
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)

# Extract BPM value
if isinstance(tempo, np.ndarray):
    bpm = float(tempo[0]) if len(tempo) > 0 else float(tempo)
else:
    bpm = float(tempo)

print(f"\n{'='*50}")
print(f"Track: {test_track['title']}")
print(f"Artist: {test_track['artist']}")
print(f"Detected BPM: {bpm:.1f}")
print(f"Number of beats detected: {len(beats)}")
print(f"{'='*50}")


## 5. Visualize beat tracking


In [None]:
# Convert beat frames to time
beat_times = librosa.frames_to_time(beats, sr=sr)

# Plot waveform with beat markers
plt.figure(figsize=(14, 4))
librosa.display.waveshow(y, sr=sr, alpha=0.6)
plt.vlines(beat_times, -1, 1, color='r', alpha=0.5, linestyle='--', label='Detected beats')
plt.title(f"Beat Tracking: {test_track['title']} (BPM: {bpm:.1f})")
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.legend()
plt.tight_layout()
plt.show()

print(f"Beat times (first 10): {beat_times[:10]}")


## 6. Test on multiple tracks


In [None]:
def calculate_bpm_from_url(preview_url):
    """Complete pipeline: download -> analyze -> return BPM."""
    try:
        # Download
        audio_file = download_preview(preview_url)
        if not audio_file:
            return None, "Download failed"
        
        # Load
        y, sr = librosa.load(audio_file, duration=30)
        
        # Calculate BPM
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        
        if isinstance(tempo, np.ndarray):
            bpm = float(tempo[0]) if len(tempo) > 0 else float(tempo)
        else:
            bpm = float(tempo)
        
        # Cleanup
        os.remove(audio_file)
        
        return bpm, None
    except Exception as e:
        return None, str(e)

# Test on all tracks
print("\nTesting BPM extraction on all tracks:\n")
print(f"{'#':<4} {'Title':<30} {'Artist':<20} {'BPM':<10} {'Status'}")
print("-" * 80)

for i, track in enumerate(sample_tracks, 1):
    if track['preview_url']:
        bpm, error = calculate_bpm_from_url(track['preview_url'])
        if bpm:
            status = f"✓ {bpm:.1f} BPM"
        else:
            status = f"✗ {error}"
    else:
        status = "✗ No preview URL"
    
    title = track['title'][:28] + '...' if len(track['title']) > 28 else track['title']
    artist = track['artist'][:18] + '...' if len(track['artist']) > 18 else track['artist']
    
    print(f"{i:<4} {title:<30} {artist:<20} {status}")


## 7. Compare with Spotify's deprecated audio features (if available)

If your app was created before Nov 2024, you can test against Spotify's original BPM data.


In [2]:
# Try to get Spotify's audio features for comparison
print("\nComparing with Spotify's audio features (if available):\n")
print(f"{'Track':<30} {'Librosa BPM':<15} {'Spotify BPM':<15} {'Difference'}")
print("-" * 80)

for track in sample_tracks:
    if not track['preview_url']:
        continue
    
    # Get librosa BPM
    librosa_bpm, _ = calculate_bpm_from_url(track['preview_url'])
    
    # Try to get Spotify's BPM
    try:
        features = sp.audio_features(track['track_id'])[0]
        spotify_bpm = features['tempo'] if features else None
    except Exception as e:
        spotify_bpm = None
        print(f"Note: Could not fetch Spotify features - {e}")
    
    title = track['title'][:28] + '...' if len(track['title']) > 28 else track['title']
    
    if librosa_bpm and spotify_bpm:
        diff = abs(librosa_bpm - spotify_bpm)
        print(f"{title:<30} {librosa_bpm:<15.1f} {spotify_bpm:<15.1f} {diff:.1f}")
    elif librosa_bpm:
        print(f"{title:<30} {librosa_bpm:<15.1f} {'N/A':<15} {'N/A'}")
    else:
        print(f"{title:<30} {'Failed':<15} {'N/A':<15} {'N/A'}")



Comparing with Spotify's audio features (if available):

Track                          Librosa BPM     Spotify BPM     Difference
--------------------------------------------------------------------------------


NameError: name 'sample_tracks' is not defined

## 8. Conclusions and Next Steps

Based on the results above:
- Does librosa successfully extract BPM from preview URLs?
- How accurate are the results compared to Spotify's data (if available)?
- What percentage of tracks have preview URLs?
- Are there any performance concerns?

**Things to consider:**
1. Preview URLs are only 30 seconds - may not capture full song dynamics
2. Some tracks may not have preview URLs (especially older/regional content)
3. BPM detection can be tricky for complex rhythms or tempo changes
4. Processing time per track (for planning parallel processing)
