# Spotify API Call

See here for getting started:
https://developer.spotify.com/documentation/web-api/tutorials/getting-started

In [1]:
import pandas as pd
import requests
import json
import yaml


# Authenticating

In [2]:
with open ('secrets.yml','r') as f:
    secrets = yaml.safe_load(f)

In [4]:
client_id = secrets['client_id']
client_secret = secrets['client_secret']

url = 'https://accounts.spotify.com/api/token'
headers = {'Content-Type': 'application/x-www-form-urlencoded'}

data = {
   'grant_type':'client_credentials',
   'client_id': client_id,
   'client_secret':client_secret
}

auth_req = requests.post(url, headers=headers, data =data)

print("Status Code", auth_req.status_code)
print("JSON Response ", auth_req.json())

access_token = auth_req.json()['access_token']
headers = {'Authorization': f'Bearer {access_token}'}

Status Code 200
JSON Response  {'access_token': 'BQDghMkhTk1xr6bgqXWRc1Yq9vzxWFQRmg8kjhKtZYIiht4EtlilAxXTK-WiMa8ehE08FG6ErzuTfHCcAnjCw7F_TZgnxhyO7BYKZLq1xe31q4TKML0', 'token_type': 'Bearer', 'expires_in': 3600}


# Getting track data from playlist url

In [26]:
playlist_url = 'https://open.spotify.com/playlist/4oYSWmdhUMwEu0yAFA47lZ'
playlist_URI = playlist_url.split('/')[-1].split("?")[0]
playlist_URI

'4oYSWmdhUMwEu0yAFA47lZ'

In [27]:
playlist_endpoint = f'https://api.spotify.com/v1/playlists/{playlist_URI}/tracks'
offset = 0
playlist_pages = []

# Looping as the json is cut off at 100

while True:
    response = requests.get(playlist_endpoint+f'?offset={offset}', headers=headers)
    playlist_total = response.json()['total']
    playlist_pages.append(response.json()['items'])

    offset += 100
    if offset>playlist_total:
        break


print(response.json()['items'][0])
print(f"There are {playlist_total} tracks in this playlist")
print(f"Number of pages in playlist_pages: {len(playlist_pages)}")

{'added_at': '2022-09-06T03:00:48Z', 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/tylerduong81'}, 'href': 'https://api.spotify.com/v1/users/tylerduong81', 'id': 'tylerduong81', 'type': 'user', 'uri': 'spotify:user:tylerduong81'}, 'is_local': False, 'primary_color': None, 'track': {'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6PXS4YHDkKvl1wkIl4V8DL'}, 'href': 'https://api.spotify.com/v1/artists/6PXS4YHDkKvl1wkIl4V8DL', 'id': '6PXS4YHDkKvl1wkIl4V8DL', 'name': 'Fetty Wap', 'type': 'artist', 'uri': 'spotify:artist:6PXS4YHDkKvl1wkIl4V8DL'}], 'available_markets': ['AR', 'BO', 'BR', 'BG', 'CA', 'CL', 'CO', 'CR', 'CZ', 'DO', 'EC', 'EE', 'SV', 'GR', 'GT', 'HN', 'HK', 'HU', 'LV', 'LT', 'MY', 'MT', 'MX', 'NI', 'PA', 'PY', 'PE', 'PH', 'PL', 'SG', 'SK', 'TW', 'TR', 'UY', 'US', 'ID', 'JP', 'TH', 'VN', 'RO', 'IL', 'ZA', 'SA', 'AE', 'BH', 'QA', 'OM', 'KW', 'EG', 'MA', 'DZ', 'TN', 'LB', 'JO', 'PS', 'IN', 'BY', 'K

In [256]:
response.text

'{\n  "href" : "https://api.spotify.com/v1/playlists/4oYSWmdhUMwEu0yAFA47lZ/tracks?offset=200&limit=100",\n  "items" : [ {\n    "added_at" : "2022-09-06T03:00:48Z",\n    "added_by" : {\n      "external_urls" : {\n        "spotify" : "https://open.spotify.com/user/tylerduong81"\n      },\n      "href" : "https://api.spotify.com/v1/users/tylerduong81",\n      "id" : "tylerduong81",\n      "type" : "user",\n      "uri" : "spotify:user:tylerduong81"\n    },\n    "is_local" : false,\n    "primary_color" : null,\n    "track" : {\n      "album" : {\n        "album_type" : "album",\n        "artists" : [ {\n          "external_urls" : {\n            "spotify" : "https://open.spotify.com/artist/6PXS4YHDkKvl1wkIl4V8DL"\n          },\n          "href" : "https://api.spotify.com/v1/artists/6PXS4YHDkKvl1wkIl4V8DL",\n          "id" : "6PXS4YHDkKvl1wkIl4V8DL",\n          "name" : "Fetty Wap",\n          "type" : "artist",\n          "uri" : "spotify:artist:6PXS4YHDkKvl1wkIl4V8DL"\n        } ],\n     

Drill down into one of the pages and extract a single track id

In [257]:
playlist_pages[0][0]['track']['uri']# extracting track URIs from the playlist

'spotify:track:7KPr0YxECy4Q1k2F17Sa0Q'

In [258]:
print(f'the shape of this list of lists is: {len(playlist_pages)} by {len(playlist_pages[0])}')

the shape of this list of lists is: 3 by 100


Extract data for each track by concatenating dataframes then just take the name and id.

In [20]:
df = pd.DataFrame()
for page in playlist_pages:
    df = pd.concat([df,pd.json_normalize(page, sep='_')])
print(df.shape)
df[['track_name','track_id']]

(280, 40)


Unnamed: 0,track_name,track_id
0,Memories (feat. Kid Cudi),7KPr0YxECy4Q1k2F17Sa0Q
1,Heat Waves - Diplo Remix,6A53tLi9vpLhmgaEvzRyip
2,Down On Me,0x3TibRgnSqzkcwrqBd4iX
3,Cooler Than Me - Single Mix,2V4bv1fNWfTcyRJKmej6Sj
4,CC Nike,0SQyJNUgav703ELhoHWq6O
...,...,...
75,Hold You - Hold Yuh,0j55p8YRAPG8yajtNyjt6A
76,The Motto,18asYwWugKjjsihZ0YvRxO
77,Titanium (feat. Sia),77TT8Xvx637TpzV8kKGkUw
78,Somebody To Love Remix,7IUl5c6u18rzmyQOblj10T


In [23]:
df.keys()

Index(['added_at', 'is_local', 'primary_color',
       'added_by_external_urls_spotify', 'added_by_href', 'added_by_id',
       'added_by_type', 'added_by_uri', 'track_album_album_type',
       'track_album_artists', 'track_album_available_markets',
       'track_album_external_urls_spotify', 'track_album_href',
       'track_album_id', 'track_album_images', 'track_album_name',
       'track_album_release_date', 'track_album_release_date_precision',
       'track_album_total_tracks', 'track_album_type', 'track_album_uri',
       'track_artists', 'track_available_markets', 'track_disc_number',
       'track_duration_ms', 'track_episode', 'track_explicit',
       'track_external_ids_isrc', 'track_external_urls_spotify', 'track_href',
       'track_id', 'track_is_local', 'track_name', 'track_popularity',
       'track_preview_url', 'track_track', 'track_track_number', 'track_type',
       'track_uri', 'video_thumbnail_url'],
      dtype='object')

Loop through the track ids and for each track retrive track info and acoustics infor from the the following endpoints:

https://developer.spotify.com/documentation/web-api/reference/get-several-tracks

https://developer.spotify.com/documentation/web-api/reference/get-several-audio-features

We can only send 50 ids at a time, so a staggered loop is required.

In [8]:
headers = {'Authorization': f'Bearer {access_token}'}

track_ids = df['track_id'].to_list()

start = 0
end = len(track_ids)
step = 50 # API only accepts a maximum 50 id values

track_info=[]
track_feature_info = []



for i in range(start, end, step):
    x = i
    id_chunk = ','.join(track_ids[x:x+step])

    track_api_endpoint = f'https://api.spotify.com/v1/tracks?ids={id_chunk}'
    response_track = requests.get(track_api_endpoint, headers=headers)
    track_info.append(response_track.json())

    track_features_endpoint = f'https://api.spotify.com/v1/audio-features?ids={id_chunk}'
    response_feature = requests.get(track_features_endpoint, headers=headers)
    track_feature_info.append(response_feature.json())


    print(id_chunk)



7KPr0YxECy4Q1k2F17Sa0Q,6A53tLi9vpLhmgaEvzRyip,0x3TibRgnSqzkcwrqBd4iX,2V4bv1fNWfTcyRJKmej6Sj,0SQyJNUgav703ELhoHWq6O,0qOnSQQF0yzuPWsXrQ9paz,0Hf4aIJpsN4Os2f0y0VqWl,5TkI9M5QJeeFPcB4xCxNFK,4QNpBfC0zvjKqPJcyqBy9W,2bJvI42r8EF3wxjOuDav4r,62zFEHfAYl5kdHYOivj4BC,20FZrLl9zSbO9b3DhaqO6M,2ynC4LLMrU7ZOuE0D5PGae,0wwPcA6wtMf6HUMpIRdeP7,4NTWZqvfQTlOMitlVn6tew,69yVxyuRahEzs2taFMVVoO,3RWl1dP90FXmZDUeIAO20j,2zJZwWF7BTGIIvrAlgzJEx,7LP4Es66zdY7CyjepqmvAg,5vlEg2fT4cFWAqU5QptIpQ,4Vs3tWESL3flxZETGxNBtc,3JA9Jsuxr4xgHXEawAdCp4,2gam98EZKrF9XuOkU13ApN,0TyOpxlWwDx98bjkIVHUgY,7LR85XLWw2yXqKBSI5brbG,20zQZVyUNPbq8kZACdgYrh,1JO2G7AA66n35fv1T8HIT4,1uN0f4NqMDcReSwXO6PLuG,4TsmezEQVSZNNPv5RJ65Ov,7ksYJ95P5vP87A0GH34CIk,4wTMBYRE6xVTIUQ6fEudsJ,4vp2J1l5RD4gMZwGFLfRAu,6ebkx7Q5tTxrCxKq4GYj0Y,4356Typ82hUiFAynbLYbPn,2mKouqwAIdQnMP43zxR89r,7gDKfEMIhyyO4asgEXwcrs,0uXO2GrNiIb1xHT9LUdxZE,1bM50INir8voAkVoKuvEUI,1DqdF42leyFIzqNDv9CjId,1CdqVF1ywD0ZO1zXtB9yWa,6Rb0ptOEjBjPPQUlQtQGbL,4DvhkX2ic4zWkQeWMwQ2qf,6btyEL6NwUa97Nex9cZFvo,5HQVUIKwCEX

Also, not that the format is id1,id2,ids that is why we do ','.join(list) to make it in the required format to send ot the API end point as id_chunks (chunk of 50 track ids)

In [271]:
type(track_info)

list

Again do something similar to the above with concatenating the dataframes

In [29]:
df2 = pd.DataFrame()
for item in track_info:
    df2 = pd.concat([df2,pd.json_normalize(item['tracks'], sep='_')])
    #print(item)
print(df2.shape)


(280, 29)


In [31]:
df2.head()

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,href,id,is_local,name,popularity,...,album_id,album_images,album_name,album_release_date,album_release_date_precision,album_total_tracks,album_type,album_uri,external_ids_isrc,external_urls_spotify
0,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,210840,False,https://api.spotify.com/v1/tracks/7KPr0YxECy4Q...,7KPr0YxECy4Q1k2F17Sa0Q,False,Memories (feat. Kid Cudi),67,...,2h7IRrDatzXKOlDdmTvKfi,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Memories (feat. Kid Cudi),2010-02-12,day,3,album,spotify:album:2h7IRrDatzXKOlDdmTvKfi,FRZID0900480,https://open.spotify.com/track/7KPr0YxECy4Q1k2...
1,[{'external_urls': {'spotify': 'https://open.s...,[],2,141897,False,https://api.spotify.com/v1/tracks/6A53tLi9vpLh...,6A53tLi9vpLhmgaEvzRyip,False,Heat Waves - Diplo Remix,1,...,0KTj6k94XZh0c6IEMfxeWV,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",Dreamland (+ Bonus Levels),2020-08-06,day,18,album,spotify:album:0KTj6k94XZh0c6IEMfxeWV,GBUM72004216,https://open.spotify.com/track/6A53tLi9vpLhmga...
2,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,228453,False,https://api.spotify.com/v1/tracks/0x3TibRgnSqz...,0x3TibRgnSqzkcwrqBd4iX,False,Down On Me,77,...,3sGj0fBj5EJbewKgCWA1uD,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",All About You,2010-01-01,day,11,album,spotify:album:3sGj0fBj5EJbewKgCWA1uD,USUM71023043,https://open.spotify.com/track/0x3TibRgnSqzkcw...
3,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,213293,False,https://api.spotify.com/v1/tracks/2V4bv1fNWfTc...,2V4bv1fNWfTcyRJKmej6Sj,False,Cooler Than Me - Single Mix,79,...,2nnIlWcriIqcJtjduWcTRl,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",31 Minutes to Takeoff,2010-08-09,day,12,album,spotify:album:2nnIlWcriIqcJtjduWcTRl,USJAY1000035,https://open.spotify.com/track/2V4bv1fNWfTcyRJ...
4,[{'external_urls': {'spotify': 'https://open.s...,"[AR, AU, AT, BE, BO, BR, BG, CA, CL, CO, CR, C...",1,148448,True,https://api.spotify.com/v1/tracks/0SQyJNUgav70...,0SQyJNUgav703ELhoHWq6O,False,CC Nike,30,...,0ecL0PLzMpSy7aYcRhOvRK,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",CC Nike,2023-05-12,day,1,album,spotify:album:0ecL0PLzMpSy7aYcRhOvRK,QZNB82315787,https://open.spotify.com/track/0SQyJNUgav703EL...


In [13]:
df2.columns

Index(['artists', 'available_markets', 'disc_number', 'duration_ms',
       'explicit', 'href', 'id', 'is_local', 'name', 'popularity',
       'preview_url', 'track_number', 'type', 'uri', 'album_album_type',
       'album_artists', 'album_available_markets',
       'album_external_urls_spotify', 'album_href', 'album_id', 'album_images',
       'album_name', 'album_release_date', 'album_release_date_precision',
       'album_total_tracks', 'album_type', 'album_uri', 'external_ids_isrc',
       'external_urls_spotify'],
      dtype='object')

Same as above again, just the audio_features endpoint

In [16]:
df3 = pd.DataFrame()
for item in track_feature_info:
    df3 = pd.concat([df3,pd.json_normalize(item['audio_features'], sep='_')])
df3.shape

(280, 18)

In [17]:
df3

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.546,0.898,8,-3.938,1,0.2320,0.00158,0.000006,0.2290,0.466,130.002,audio_features,7KPr0YxECy4Q1k2F17Sa0Q,spotify:track:7KPr0YxECy4Q1k2F17Sa0Q,https://api.spotify.com/v1/tracks/7KPr0YxECy4Q...,https://api.spotify.com/v1/audio-analysis/7KPr...,210840,4
1,0.568,0.565,11,-7.187,1,0.3380,0.11600,0.000000,0.4280,0.403,137.075,audio_features,6A53tLi9vpLhmgaEvzRyip,spotify:track:6A53tLi9vpLhmgaEvzRyip,https://api.spotify.com/v1/tracks/6A53tLi9vpLh...,https://api.spotify.com/v1/audio-analysis/6A53...,141898,4
2,0.700,0.598,2,-7.783,1,0.1140,0.03690,0.000000,0.1110,0.594,160.041,audio_features,0x3TibRgnSqzkcwrqBd4iX,spotify:track:0x3TibRgnSqzkcwrqBd4iX,https://api.spotify.com/v1/tracks/0x3TibRgnSqz...,https://api.spotify.com/v1/audio-analysis/0x3T...,228453,4
3,0.768,0.820,7,-4.630,0,0.0474,0.17900,0.000000,0.6890,0.625,129.965,audio_features,2V4bv1fNWfTcyRJKmej6Sj,spotify:track:2V4bv1fNWfTcyRJKmej6Sj,https://api.spotify.com/v1/tracks/2V4bv1fNWfTc...,https://api.spotify.com/v1/audio-analysis/2V4b...,213293,4
4,0.777,0.635,4,-6.934,0,0.3150,0.23800,0.000000,0.1810,0.640,96.926,audio_features,0SQyJNUgav703ELhoHWq6O,spotify:track:0SQyJNUgav703ELhoHWq6O,https://api.spotify.com/v1/tracks/0SQyJNUgav70...,https://api.spotify.com/v1/audio-analysis/0SQy...,148448,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,0.690,0.621,8,-3.583,1,0.0600,0.32100,0.000028,0.0884,0.497,66.858,audio_features,0j55p8YRAPG8yajtNyjt6A,spotify:track:0j55p8YRAPG8yajtNyjt6A,https://api.spotify.com/v1/tracks/0j55p8YRAPG8...,https://api.spotify.com/v1/audio-analysis/0j55...,233253,3
26,0.754,0.763,7,-4.627,0,0.0435,0.03010,0.000022,0.0901,0.464,117.953,audio_features,18asYwWugKjjsihZ0YvRxO,spotify:track:18asYwWugKjjsihZ0YvRxO,https://api.spotify.com/v1/tracks/18asYwWugKjj...,https://api.spotify.com/v1/audio-analysis/18as...,164819,4
27,0.604,0.788,0,-3.673,0,0.1030,0.06780,0.153000,0.1270,0.301,126.060,audio_features,77TT8Xvx637TpzV8kKGkUw,spotify:track:77TT8Xvx637TpzV8kKGkUw,https://api.spotify.com/v1/tracks/77TT8Xvx637T...,https://api.spotify.com/v1/audio-analysis/77TT...,245040,4
28,0.703,0.836,5,-4.619,1,0.0283,0.00388,0.000000,0.1600,0.552,129.987,audio_features,7IUl5c6u18rzmyQOblj10T,spotify:track:7IUl5c6u18rzmyQOblj10T,https://api.spotify.com/v1/tracks/7IUl5c6u18rz...,https://api.spotify.com/v1/audio-analysis/7IUl...,220160,4


In [32]:
df3.columns

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'type', 'id', 'uri', 'track_href', 'analysis_url', 'duration_ms',
       'time_signature'],
      dtype='object')

In [38]:
df_final = df2.merge(df3, on = 'id', how = 'left')
df_final.shape

(280, 46)

In [39]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 280 entries, 0 to 279
Data columns (total 46 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   artists                       280 non-null    object 
 1   available_markets             280 non-null    object 
 2   disc_number                   280 non-null    int64  
 3   duration_ms_x                 280 non-null    int64  
 4   explicit                      280 non-null    bool   
 5   href                          280 non-null    object 
 6   id                            280 non-null    object 
 7   is_local                      280 non-null    bool   
 8   name                          280 non-null    object 
 9   popularity                    280 non-null    int64  
 10  preview_url                   146 non-null    object 
 11  track_number                  280 non-null    int64  
 12  type_x                        280 non-null    object 
 13  uri_x

In [40]:
df_final.to_csv('playlist_data.csv')