#### Articles that helped me get started with Spotify's Web API:
https://towardsdatascience.com/organizing-my-spotify-playlists-with-data-science-9a528110319  
https://morioh.com/a/31b8a607b2b0/how-to-extract-data-using-spotifys-api-python-and-spotipy

#### Spotify audio feature documentation:
https://developer.spotify.com/documentation/web-api/reference/get-audio-features

In [1]:
#import spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
#in developer dashboard, create app to get client_id and client_secret
client_id = 'CLIENT_ID'
client_secret = 'CLIENT_SECRET'

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
#extract features of interest given spotify track id
def getTrackFeatures(id):
    track = sp.track(id)
    features = sp.audio_features(id)
    
    # general track info
    name = track['name']
    artist = track['album']['artists'][0]['name']
    album = track['album']['name']
    release_date = track['album']['release_date']
    popularity = track['popularity']
    
    # track audio features
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    key = features[0]['key']
    loudness = features[0]['loudness']
    mode = features[0]['mode']
    speechiness = features[0]['speechiness']
    acousticness = features[0]['acousticness']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    valence = features[0]['valence']
    tempo = features[0]['tempo']
    duration_ms = features[0]['duration_ms']
    time_signature = features[0]['time_signature']

    #return data
    track_features = [name, artist, album, release_date, popularity, danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, duration_ms, time_signature]
    return track_features

In [4]:
#extract features from each track in a playlist given a user's username and playlist id
def createPlaylist(username, playlist_id):
    results = sp.user_playlist_tracks(username, playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    track_ids = []
    for track in tracks:
        track_ids.append(track['track']['id'])
    playlist_features = []
    for id in track_ids:
        playlist_features.append(getTrackFeatures(id))
    print('Playlist information extracted successfully.')
    return playlist_features

In [5]:
#extract playlist info using username and desired playlist id, found in the playlist's share link (this may take a few minutes)
playlist = createPlaylist('Madhan J', '3BtXuMAj6SMjqtWwVsuWh0')

Playlist information extracted successfully.


In [6]:
#import pandas and convert to dataframe
import pandas as pd
playlist_df = pd.DataFrame(playlist, columns = ['name', 'artist', 'album', 'release_date', 'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature'])

#print first few rows of dataframe to confirm everything works properly
playlist_df.head()

Unnamed: 0,name,artist,album,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,A Sky Full of Stars,Coldplay,Ghost Stories,2014-05-19,89,0.545,0.675,6,-6.474,1,0.0279,0.00617,0.00197,0.209,0.162,124.97,267867,4
1,Apple Pie,Travis Scott,Rodeo,2015-09-04,71,0.655,0.496,3,-4.139,0,0.0364,0.0335,0.0,0.156,0.28,137.945,219360,4
2,Still Dreaming,Nas,Hip Hop Is Dead (Expanded Edition),2007-01-23,64,0.547,0.825,7,-8.499,1,0.212,0.105,0.000248,0.14,0.589,119.743,217827,5
3,That Go! (feat. T-Shyne),Young Stoner Life,Slime Language 2,2021-04-16,54,0.713,0.575,1,-6.706,1,0.0501,0.00669,0.0,0.169,0.189,145.95,226400,4
4,THank god 4 me,ScHoolboy Q,BLUE LIPS,2024-03-01,35,0.721,0.566,2,-7.871,1,0.196,0.652,0.0,0.112,0.415,136.017,177261,4


In [7]:
#convert to csv for easy access
playlist_df.to_csv('data/playlist_info.csv', index=False)