In [17]:
import requests
import base64
import pandas as pd
import time
from typing import List, Optional

# --- CONFIGURATION ---
CLIENT_ID = '5f857178968840c6ac260b716ccccb4e'
CLIENT_SECRET = '95e7fed22a1849b7b2d9c1880877a2f6'
PLAYLIST_ID = '0oD5aLzUQiIomu805F3xDH'

# --- AUTHENTICATION ---
def get_spotify_token() -> str:
    """Get Spotify access token using Client Credentials flow."""
    auth_url = 'https://accounts.spotify.com/api/token'
    auth_header = base64.b64encode(f"{CLIENT_ID}:{CLIENT_SECRET}".encode()).decode()
    auth_data = {'grant_type': 'client_credentials'}
    auth_headers = {
        'Authorization': f'Basic {auth_header}',
        'Content-Type': 'application/x-www-form-urlencoded'
    }

    response = requests.post(auth_url, data=auth_data, headers=auth_headers)
    response.raise_for_status()
    return response.json()['access_token']

# --- TEST PLAYLIST ACCESS ---
def test_playlist_access(access_token: str, playlist_id: str) -> bool:
    """Test if we can access the playlist and get basic info."""
    url = f'https://api.spotify.com/v1/playlists/{playlist_id}'
    headers = {'Authorization': f'Bearer {access_token}'}
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        playlist_data = response.json()
        print("Successfully accessed playlist!")
        print(f"Playlist Name: {playlist_data['name']}")
        print(f"Description: {playlist_data.get('description', 'No description')}")
        print(f"Number of Tracks: {playlist_data['tracks']['total']}")
        return True
    except requests.exceptions.HTTPError as e:
        print(f"Failed to access playlist: {e}")
        if response.status_code == 404:
            print("This usually means:")
            print("1. The playlist ID is incorrect")
            print("2. The playlist is private or doesn't exist")
            print("3. There's a regional restriction")
        print(f"Status Code: {response.status_code}")
        return False

# --- GET PLAYLIST TRACKS ---
def get_playlist_tracks(access_token: str, playlist_id: str) -> List[dict]:
    """
    Fetches tracks from a given playlist with proper error handling.
    Handles pagination and null tracks.
    """
    url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
    headers = {'Authorization': f'Bearer {access_token}'}
    params = {'limit': 50, 'offset': 0}
    all_tracks = []
    
    try:
        while True:
            response = requests.get(url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()
            
            for item in data['items']:
                track_info = item['track']
                if track_info is None:
                    continue
                
                if track_info.get('is_local', False) or not track_info.get('id'):
                    continue
                    
                track_data = {
                    'track_name': track_info['name'],
                    'track_id': track_info['id'],
                    'artist_name': ', '.join([artist['name'] for artist in track_info['artists']]),
                    'artist_id': track_info['artists'][0]['id'] if track_info['artists'] else None,
                    'album_name': track_info['album']['name'],
                    'popularity': track_info['popularity'],
                    'duration_ms': track_info['duration_ms'],
                    'explicit': track_info['explicit']
                }
                all_tracks.append(track_data)
            
            if data['next']:
                params['offset'] += params['limit']
                time.sleep(0.1)
            else:
                break
                
    except requests.exceptions.HTTPError as e:
        print(f"Error fetching playlist tracks: {e}")
        if 'response' in locals():
            print(f"Status Code: {response.status_code}")
        return []
    
    return all_tracks

# --- GET AUDIO FEATURES ---
def get_audio_features(access_token: str, track_ids: List[str]) -> List[dict]:
    """
    Fetches audio features for tracks with proper error handling.
    Handles chunking, rate limiting, and invalid track IDs.
    """
    url = 'https://api.spotify.com/v1/audio-features'
    headers = {'Authorization': f'Bearer {access_token}'}
    all_audio_features = []
    
    valid_track_ids = [tid for tid in track_ids if tid and isinstance(tid, str)]
    print(f"Fetching audio features for {len(valid_track_ids)} valid track IDs...")
    
    for i in range(0, len(valid_track_ids), 100):
        chunk = valid_track_ids[i:i+100]
        ids_param = ','.join(chunk)
        
        try:
            response = requests.get(f'{url}?ids={ids_param}', headers=headers)
            response.raise_for_status()
            features_data = response.json()['audio_features']
            
            for feature in features_data:
                if feature is not None:
                    all_audio_features.append(feature)
            
            print(f"Processed chunk {i//100 + 1}/{(len(valid_track_ids)-1)//100 + 1}")
            time.sleep(0.2)
            
        except requests.exceptions.HTTPError as e:
            print(f"Warning: Failed to get audio features for chunk: {e}")
            continue
    
    return all_audio_features

# --- GET ARTIST INFO ---
def get_artists_info(access_token: str, artist_ids: List[str]) -> List[dict]:
    """Fetches artist information including genres."""
    url = 'https://api.spotify.com/v1/artists'
    headers = {'Authorization': f'Bearer {access_token}'}
    all_artists_info = []
    
    unique_artist_ids = list(set([aid for aid in artist_ids if aid]))
    
    for i in range(0, len(unique_artist_ids), 50):
        chunk = unique_artist_ids[i:i+50]
        ids_param = ','.join(chunk)
        
        try:
            response = requests.get(f'{url}?ids={ids_param}', headers=headers)
            response.raise_for_status()
            artists_data = response.json()['artists']
            all_artists_info.extend(artists_data)
            
            time.sleep(0.1)
            
        except requests.exceptions.HTTPError as e:
            print(f"Warning: Failed to get artist info for chunk: {e}")
            continue
    
    return all_artists_info

# --- MAIN EXECUTION ---
def main():
    print("1. Getting access token...")
    try:
        token = get_spotify_token()
        print("Successfully obtained access token")
    except Exception as e:
        print(f"Failed to get access token: {e}")
        return

    print("2. Testing playlist access...")
    if not test_playlist_access(token, PLAYLIST_ID):
        return

    print("3. Fetching playlist tracks...")
    tracks_list = get_playlist_tracks(token, PLAYLIST_ID)
    
    if not tracks_list:
        print("No tracks were fetched")
        return
        
    tracks_df = pd.DataFrame(tracks_list)
    print(f"Successfully fetched {len(tracks_df)} tracks")
    
    print("4. Fetching audio features...")
    track_ids = tracks_df['track_id'].tolist()
    audio_features = get_audio_features(token, track_ids)
    
    if not audio_features:
        print("No audio features were fetched")
        final_df = tracks_df
    else:
        audio_features_df = pd.DataFrame(audio_features)
        audio_features_df.rename(columns={'id': 'track_id'}, inplace=True)
        
        final_df = tracks_df.merge(audio_features_df, on='track_id', how='left')
        print(f"Successfully merged {len(audio_features)} audio feature records")
    
    print("5. Fetching artist info...")
    artist_ids = final_df['artist_id'].dropna().tolist()
    if artist_ids:
        artists_info = get_artists_info(token, artist_ids)
        if artists_info:
            artists_df = pd.DataFrame(artists_info)
            artists_genres_df = artists_df[['id', 'genres']].rename(
                columns={'id': 'artist_id', 'genres': 'artist_genres'}
            )
            final_df = final_df.merge(artists_genres_df, on='artist_id', how='left')
            print(f"Added genre info for {len(artists_info)} artists")
    
    print("6. Final data preview:")
    print(final_df.head(3))
    print(f"Dataset shape: {final_df.shape}")
    print(f"Columns available: {list(final_df.columns)}")
    
    final_df.to_csv('spotify_playlist_data.csv', index=False)
    print("7. Data saved to 'spotify_playlist_data.csv'")
    
    if 'danceability' in final_df.columns:
        print("8. Audio Features Summary:")
        audio_cols = ['danceability', 'energy', 'valence', 'tempo', 'acousticness']
        for col in audio_cols:
            if col in final_df.columns:
                print(f"{col}: {final_df[col].mean():.3f} (avg)")

if __name__ == "__main__":
    main()

print(repr('if __name__ == "__main__":'))

1. Getting access token...
Successfully obtained access token
2. Testing playlist access...
Successfully accessed playlist!
Playlist Name: Dreampop & Shoegaze
Description: 
Number of Tracks: 575
3. Fetching playlist tracks...
Successfully fetched 575 tracks
4. Fetching audio features...
Fetching audio features for 575 valid track IDs...
No audio features were fetched
5. Fetching artist info...
Added genre info for 287 artists
6. Final data preview:
               track_name                track_id       artist_name  \
0              Wildflower  24bzJwjGBWtGCs7xKIKQsB       Beach House   
1  Perfectly Out Of Place  5jtbhF7zGZR4YRMxhLV4V8  Dreams We've Had   
2        After the Moment  01nIMkXQLxBmDKDtQCimM6      Craft Spells   

                artist_id          album_name  popularity  duration_ms  \
0  56ZTgzPBDge0OvCGgMO3OY   Depression Cherry          66       219453   
1  5LkgVE79AHNZN7kMukfJFv  Everything And You          41       293454   
2  16lg8y62dC3J6ok1pNAbFJ    After the M

In [None]:
print(repr('if __name__ == "__main__":'))

'if __name__ == "__main__":'
