# Obtaining Playlist Information

Using our Spotify client ID and secret, install and initialize Spotipy.

In [1]:
!pip install spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
cid = '41ef3d43ab644b70b02c5cd59c863774'
secret = 'e6dc9a5208a04977991022b24a1bb6fe'
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager
=
client_credentials_manager)

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.


We apply the below function to extract track data and store track ID.

In [2]:
def get_playlist_tracks(username,playlist_id):
    
    # Takes in a Spotify user name and playlist ID (URI) and returns list of dictionaries of track data
    
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    
    # Extending pagination results limit
    
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

all_tracks = get_playlist_tracks('Four Tet', '2uzbATYxs9V8YQi5lf89WG')

# Create list of track IDs
all_track_ids = []
for i in all_tracks:
    all_track_ids.append(i['track']['id'])
len(all_track_ids)

1749

Now that we have our IDs, we use Spotipy to extract additional data; in this case we will build a DataFrame with descriptive metadata and audio features. Our playlist for modeling will be DJ and producer Four Tet's popular 1700+ song playlist.

In [3]:
def getTrackFeatures(id_list):
    
    # Create list of features to be used in conjuction with ID list
    meta = sp.track(id_list)
    features = sp.audio_features(id_list)

    # metadata
    track_name = meta['name']
    album = meta['album']['name']
    artist_name = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    popularity = meta['popularity']

    # audio features
    acousticness = features[0]['acousticness']
    danceability = features[0]['danceability']
    duration_ms = meta['duration_ms']
    energy = features[0]['energy']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    loudness = features[0]['loudness']
    speechiness = features[0]['speechiness']
    tempo = features[0]['tempo']
    valence = features[0]['valence']

    track = [track_name, album, artist_name, release_date, duration_ms, popularity, danceability, acousticness, energy, instrumentalness, liveness, loudness, speechiness, tempo, valence]
    return track

In [6]:
# Iterating through ID list and getting feature columns
# Note - This cell takes several minutes to run
tracks = []
for i in range(len(all_track_ids)):
    track = getTrackFeatures(all_track_ids[i])
    tracks.append(track)

# Creating the DataFrame and saving as csv file
df = pd.DataFrame(tracks, columns = ['track_name', 'album', 'artist_name', 'release_date', 'duration_ms', 'popularity', 'danceability', 'acousticness', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence'])
df.to_csv("fourtet.csv", sep = ',')

Let's build DataFrames for two additional playlists of different sizes and musical type to further test our model.

In [16]:
all_tracks_2 = get_playlist_tracks('Spotify', '37i9dQZF1DWSqmBTGDYngZ')

# Create list of track IDs
all_track_ids_2 = []
for i in all_tracks_2:
    all_track_ids_2.append(i['track']['id'])
len(all_track_ids_2)

200

In [17]:
tracks_2 = []
for i in range(len(all_track_ids_2)):
    track_2 = getTrackFeatures(all_track_ids_2[i])
    tracks_2.append(track_2)

# Creating the DataFrame and saving as csv file
df_2 = pd.DataFrame(tracks_2, columns = ['track_name', 'album', 'artist_name', 'release_date', 'duration_ms', 'popularity', 'danceability', 'acousticness', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence'])
df_2.to_csv("showersongs.csv", sep = ',')

In [24]:
all_tracks_3 = get_playlist_tracks('Max Richter', '1KwEaM2Vvz7oVjnn1Phabf')

# Create list of track IDs
all_track_ids_3 = []
for i in all_tracks_3:
    all_track_ids_3.append(i['track']['id'])
len(all_track_ids_3)

78

In [25]:
tracks_3 = []
for i in range(len(all_track_ids_3)):
    track_3 = getTrackFeatures(all_track_ids_3[i])
    tracks_3.append(track_3)

# Creating the DataFrame and saving as csv file
df_3 = pd.DataFrame(tracks_3, columns = ['track_name', 'album', 'artist_name', 'release_date', 'duration_ms', 'popularity', 'danceability', 'acousticness', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence'])
df_3.to_csv("maxrichter.csv", sep = ',')