In [6]:
# Approach

# 1. Decide on relevant playlists
# 2. Write function to get all artist names from the list
# 3. Write function to get all song names and ids from artists
# 4. Get audio features for relevant songs from spotify API
# 5. Merge dataframes to have holistic data set

In [7]:
# import relevant libraries

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from tqdm.notebook import tqdm
import requests
import getpass

In [8]:
# initialize SpotiPy

client_id= getpass.getpass()
client_secret=getpass.getpass()

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret))

········
········


### 1. Get a Playlist

In [9]:
#https://open.spotify.com/playlist/0weizyV5WNZP3tvfXWVfmg
playlist = sp.user_playlist("Felix", "0weizyV5WNZP3tvfXWVfmg")

#https://open.spotify.com/playlist/0weizyV5WNZP3tvfXWVfmg?si=071fa0b4cdec4bf3

In [10]:
playlist

{'collaborative': False,
 'description': 'All classic and well known songs in one playlist! Enjoy! Don´t miss my other playlists!',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/0weizyV5WNZP3tvfXWVfmg'},
 'followers': {'href': None, 'total': 64206},
 'href': 'https://api.spotify.com/v1/playlists/0weizyV5WNZP3tvfXWVfmg?additional_types=track',
 'id': '0weizyV5WNZP3tvfXWVfmg',
 'images': [{'height': None,
   'url': 'https://i.scdn.co/image/ab67706c0000bebbcf9adca05f91d49c6eb46792',
   'width': None}],
 'name': 'Hip Hop Classics',
 'owner': {'display_name': 'Felix',
  'external_urls': {'spotify': 'https://open.spotify.com/user/kderqubwka59pl3fms356iw07'},
  'href': 'https://api.spotify.com/v1/users/kderqubwka59pl3fms356iw07',
  'id': 'kderqubwka59pl3fms356iw07',
  'type': 'user',
  'uri': 'spotify:user:kderqubwka59pl3fms356iw07'},
 'primary_color': None,
 'public': True,
 'snapshot_id': 'MzQxLGVkOTNiYTFkZjI5MDRlZGQ1OTg5Y2M4M2RkOWQ0ODdkOTQwOWU1ZDg=',
 'tracks': {'href': 

### 2. Explore features of the playlist

In [11]:
playlist['tracks'].keys()

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [12]:
playlist['tracks']['items']

[{'added_at': '2021-06-18T11:27:04Z',
  'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/kderqubwka59pl3fms356iw07'},
   'href': 'https://api.spotify.com/v1/users/kderqubwka59pl3fms356iw07',
   'id': 'kderqubwka59pl3fms356iw07',
   'type': 'user',
   'uri': 'spotify:user:kderqubwka59pl3fms356iw07'},
  'is_local': False,
  'primary_color': None,
  'track': {'album': {'album_type': 'album',
    'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/09hVIj6vWgoCDtT03h8ZCa'},
      'href': 'https://api.spotify.com/v1/artists/09hVIj6vWgoCDtT03h8ZCa',
      'id': '09hVIj6vWgoCDtT03h8ZCa',
      'name': 'A Tribe Called Quest',
      'type': 'artist',
      'uri': 'spotify:artist:09hVIj6vWgoCDtT03h8ZCa'}],
    'available_markets': ['AD',
     'AE',
     'AG',
     'AL',
     'AM',
     'AO',
     'AR',
     'AT',
     'AU',
     'AZ',
     'BA',
     'BB',
     'BD',
     'BE',
     'BF',
     'BG',
     'BH',
     'BI',
     'BJ',
     'BN',
     'B

In [13]:
# get artists name
playlist['tracks']['items'][0]['track']['artists'][0]['name']

'A Tribe Called Quest'

In [14]:
# get track names

playlist['tracks']['items'][0]['track']['name']

'Scenario'

In [15]:
# get uris
playlist['tracks']['items'][0]['track']['uri']

'spotify:track:0301nLjG0ti26rx5ZmfqtP'

### 3. Realize that you should have used the while loop from the get-go and define a function

In [16]:
def get_playlist_tracks(username,playlist_id):
    playlist = sp.user_playlist_tracks(username,playlist_id)
    tracks = playlist['items']
    while playlist['next']:
        playlist = sp.next(playlist)
        tracks.extend(playlist['items'])
    return tracks

In [17]:
playlist = get_playlist_tracks("Felix", "0weizyV5WNZP3tvfXWVfmg")

In [18]:
playlist[0].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

In [19]:
len(playlist)

176

In [20]:
# find artist
playlist[0]['track']['artists'][0]['name']

'A Tribe Called Quest'

In [21]:
for item in playlist:
    print(item['track']['artists'][0]['name'])

A Tribe Called Quest
Kelis
Rilès
Boys Noize
G-Unit
Westside Connection
50 Cent
Dr. Dre
Jeremih
A Tribe Called Quest
Dr. Dre
DMX
Ice Cube
Luniz
Nelly
The Notorious B.I.G.
DMX
Kanye West
Drake
Eve
Kendrick Lamar
Eminem
Big Sean
Diddy
JAY-Z
Fabolous
DaBaby
Kanye West
Lil Wayne
Grits
Lil Wayne
Lil Baby
Tech N9ne
Coolio
Rakim
Eminem
Eminem
Kanye West
Tory Lanez
Diddy
Dead Prez
Kanye West
Westside Connection
The Notorious B.I.G.
Bow Wow
Post Malone
Kanye West
2Pac
Busta Rhymes
Ice Cube
T-Pain
Method Man
Ying Yang Twins
Dr. Dre
50 Cent
Ludacris
Snoop Dogg
Tyga
Missy Elliott
Dr. Dre
Eminem
50 Cent
Ciara
D12
Destiny's Child
The Game
Snoop Dogg
Mariah Carey
Eminem
R. Kelly
Ice Cube
Tyga
Waka Flocka Flame
50 Cent
Eminem
Snoop Dogg
Ciara
Kanye West
DMX
Xzibit
The Notorious B.I.G.
Lil Tecca
DMX
Eminem
The Game
B.o.B
House Of Pain
50 Cent
Kendrick Lamar
Lloyd Banks
Jim Jones
DMX
Houston
Eminem
Fat Joe
Jibbs
Cali Swag District
Ying Yang Twins
Dr. Dre
Ice Cube
Kanye West
The Notorious B.I.G.
Missy Ell

In [22]:
# find song name
playlist[0]['track']['name']

'Scenario'

In [23]:
# find uri
playlist[0]['track']['uri']

'spotify:track:0301nLjG0ti26rx5ZmfqtP'

In [24]:
playlist[0]['track']['artists']

[{'external_urls': {'spotify': 'https://open.spotify.com/artist/09hVIj6vWgoCDtT03h8ZCa'},
  'href': 'https://api.spotify.com/v1/artists/09hVIj6vWgoCDtT03h8ZCa',
  'id': '09hVIj6vWgoCDtT03h8ZCa',
  'name': 'A Tribe Called Quest',
  'type': 'artist',
  'uri': 'spotify:artist:09hVIj6vWgoCDtT03h8ZCa'}]

In [25]:
def get_all_songs(playlist):
    return [item['track']['name'] for item in playlist]

In [26]:
all_songs_playlist = get_all_songs(playlist)

In [27]:
def get_all_uris(playlist):
    return [item['track']['uri'] for item in playlist]

In [28]:
all_uris_playlist = get_all_uris(playlist)

In [29]:
# we will stick to the first artist of each song to reduce complexity

def get_all_artists(playlist):
    return [item['track']['artists'][0]['name'] for item in playlist]

In [30]:
all_artists = get_all_artists(playlist)

### 4. Get all albums of all the artists

In [31]:
def get_all_artist_ids(playlist):
    return [item['track']['artists'][0]['id'] for item in playlist]

In [32]:
artist_ids = get_all_artist_ids(playlist)

In [33]:
len(artist_ids)

176

In [34]:
def get_all_albums(artist_ids):
    allalbums = []
    
    for artist in artist_ids:
        album_list = sp.artist_albums(artist, album_type = 'album', limit = 20)
        
        albums = album_list['items']
    
        while album_list['next']:
            album_list = sp.next(album_list)
            allalbums.extend(album_list['items'])
    return allalbums

In [35]:
all_albums = get_all_albums(artist_ids)

In [36]:
len(all_albums)

1381

In [37]:
all_albums[0].keys()

dict_keys(['album_group', 'album_type', 'artists', 'available_markets', 'external_urls', 'href', 'id', 'images', 'name', 'release_date', 'release_date_precision', 'total_tracks', 'type', 'uri'])

In [47]:
def get_all_album_ids(all_albums):
    return [album['id'] for album in all_albums]

In [48]:
album_ids = get_all_album_ids(all_albums)

In [49]:
album_ids

['4ycNE7y1rp5215g1kkqk1P',
 '75Zkl39tT79VRpfLzuttxS',
 '52ie13EXbuGv1YhEY3BpdI',
 '4YRUsdB3lJ53NmOp0uILQ4',
 '5UYiEB3cGXGihBe7Wvp6dX',
 '2xIhksIizs6gWdRBYdiTLc',
 '61z8ltRVx7p4ZpQXWk4U29',
 '3dNsHWAS8t7AiAqj9ofG5T',
 '6TulrtXfRytg9FCUC4wTuE',
 '1Asg9LRyf3IBmTUKBjmsEW',
 '3WZoJdkb9NSvZqkUShSKv4',
 '7JlCbNWZszAdVKZdtHFQ5p',
 '21JMRGrW8P50U4005RiFGy',
 '71HM1CMYWeZzws8pyiEn46',
 '5EJHJqWoiBXTL6ZQxJBGSc',
 '7oeFV70h2fDapspboicRP5',
 '09TlsckA5lUqWWuIOSnGbw',
 '3AI5kAUjgNtZBwFRi6opDc',
 '6Q2cv2xqzNdf3Rex5sd0HC',
 '17T8wyeiIcfNg7wYfSSqZq',
 '2DoO4TaM7fasqEuad0A8fK',
 '4LrUT8Vg5hmIPM61zGT3ej',
 '6tH2KbxjKSHD5q7kqMsE8C',
 '0xn5YrY8D84KmE4viWaVHD',
 '2HOe7ExqoZcdXPWfMlODR3',
 '2AIYOPRTJxvFkjZ3Uu9L88',
 '07r7KrppFUq72j7nEznjlo',
 '7abEUx9q7vXJLSVLl8FgYu',
 '0JNE1tQLa9JM8E877WIQgR',
 '7xt2MWlSFHNP2NcHWE4W3b',
 '5E2vrvNXeEnbFI7Ym9c9nv',
 '1YUOBZgc3gtvNib8MCSvo1',
 '6WJsuS9yAcz1Mlorxxw6wJ',
 '3cSUd7H22DtZzvSUJkBIlM',
 '1kh5XByhI0DDWV7m9kddr0',
 '4HUUHHXBXImwksfbSPqE7q',
 '7kUJA0mPf26wNsxXy9KgrE',
 

### 5. Get all tracks of all albums

In [142]:
def get_all_tracks(album_ids):
    all_tracks = []
    for album_id in album_ids:
        track_list = sp.album_tracks(album_id)
        #print(track_list)
        all_tracks_album = track_list['items']
        
        while track_list['next']:
            track_list = sp.next(track_list)
            all_tracks_album.extend(track_list['items'])
        all_tracks.extend(all_tracks_album)
    return all_tracks

In [143]:
all_songs = get_all_tracks(album_ids)

In [144]:
len(all_songs)

24659

In [341]:
all_songs[0]['artists'][0]['name']

'50 Cent'

In [289]:
songs_clean_new = [dct for dct in all_songs if dct]

In [345]:
songs_clean_new[0]['artists'][0]['name']

'50 Cent'

In [187]:
def get_all_track_names(all_songs):
    return [track['id'] for track in all_songs]

In [189]:
all_ids = get_all_track_names(all_songs)

In [192]:
len(all_ids)

24659

In [194]:
all_ids[0]

'1u6KuLpfQR8EfAteIl3ej0'

In [250]:
def get_all_track_artists(all_songs):
    return [track['artists'] for track in all_songs]

In [252]:
all_artists = get_all_track_artists(all_songs)

In [253]:
artist_clean = [item for sublist in all_artists for item in sublist]

In [257]:
def get_all_track_artist_names(all_songs):
    return [artist['name'] for artist in artist_clean]

In [258]:
artist_name = get_all_track_artist_names(artist_clean)

In [260]:
def get_all_track_artist_ids(all_songs):
    return [artist['id'] for artist in artist_clean]

In [261]:
artist_id = get_all_track_artist_ids(artist_name)

In [262]:
artist_id

['3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '7dGJo4pcD2V6oG8kP0tJRR',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '4pr7J7wzgObkE3DD3Izi7q',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '50UMTib697JJ44le8DO2Va',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '1Oa0bMld0A3u5OTYfMzp5h',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3vDUJHQtqT3jFRZ2ECXDTi',
 '7dGJo4pcD2V6oG8kP0tJRR',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '7dGJo4pcD2V6oG8kP0tJRR',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 '4pr7J7wzgObkE3DD3Izi7q',
 '3q7HBObVc0L8jNeTe5Gofh',
 '3q7HBObVc0L8jNeTe5Gofh',
 

### 6. Get audio features for all tracks

In [172]:
sp.audio_features(all_uris[5])

[{'danceability': 0.663,
  'energy': 0.911,
  'key': 6,
  'loudness': -1.792,
  'mode': 1,
  'speechiness': 0.269,
  'acousticness': 0.0273,
  'instrumentalness': 0,
  'liveness': 0.0769,
  'valence': 0.562,
  'tempo': 87.3,
  'type': 'audio_features',
  'id': '7nFlzPTW9WGNJ2y9t3Tfvw',
  'uri': 'spotify:track:7nFlzPTW9WGNJ2y9t3Tfvw',
  'track_href': 'https://api.spotify.com/v1/tracks/7nFlzPTW9WGNJ2y9t3Tfvw',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/7nFlzPTW9WGNJ2y9t3Tfvw',
  'duration_ms': 269373,
  'time_signature': 4}]

In [202]:
def get_audio_features(all_uris):
    from time import sleep
    audio_feats = []
    
    for id_ in tqdm(all_ids):
        audio_feats.append(sp.audio_features(id_))
        sleep(0.1)
    return audio_feats

In [153]:
"""def get_all_features(all_uris):
    allfeatures = []
    
    for artist in artist_ids:
        audio_feat = sp.audio_features(tracks = all_uris)
        
        audio_feat_list = album_feat['items']
    
        while audio_feat['next']:
            audio_feat = sp.next(audio_feat)
            allfeatures.extend(audio_feat['items'])
    return allfeatures"""

In [203]:
audio_feat = get_audio_features(all_ids)

  0%|          | 0/24659 [00:00<?, ?it/s]

In [204]:
len(audio_feat)

24659

In [216]:
len(audio_feat[0])

1

In [218]:
audio_feat_clean = [item for sublist in audio_feat for item in sublist]

In [225]:
audio_feat_clean_new = [dct for dct in audio_feat_clean if dct]

### 7. Create holistic dataframe

In [226]:
audio_feat_df=pd.DataFrame(audio_feat_clean_new)

In [331]:
len(audio_feat_df['id'].unique())

10638

In [228]:
audio_feat_df.to_csv('audio_feat.csv')

In [295]:
tracks_new = [dct for dct in all_songs if dct]

In [296]:
tracks_df = pd.DataFrame(tracks_new)

In [324]:
tracks_df.head()

Unnamed: 0,artists,available_markets,disc_number,duration_ms,explicit,external_urls,href,id,is_local,name,preview_url,track_number,type,uri
0,[{'external_urls': {'spotify': 'https://open.s...,"[AU, CA, CR, DO, GT, HN, JP, MX, NI, PA, SV, US]",1,6453,False,{'spotify': 'https://open.spotify.com/track/1u...,https://api.spotify.com/v1/tracks/1u6KuLpfQR8E...,1u6KuLpfQR8EfAteIl3ej0,False,Intro,,1,track,spotify:track:1u6KuLpfQR8EfAteIl3ej0
1,[{'external_urls': {'spotify': 'https://open.s...,"[AU, CA, CR, DO, GT, HN, JP, MX, NI, PA, SV, US]",1,179466,True,{'spotify': 'https://open.spotify.com/track/1C...,https://api.spotify.com/v1/tracks/1CYyIsAaTrnm...,1CYyIsAaTrnmJ8MO2rec5f,False,What Up Gangsta,,2,track,spotify:track:1CYyIsAaTrnmJ8MO2rec5f
2,[{'external_urls': {'spotify': 'https://open.s...,"[AU, CA, CR, DO, GT, HN, JP, MX, NI, PA, SV, US]",1,288880,True,{'spotify': 'https://open.spotify.com/track/3O...,https://api.spotify.com/v1/tracks/3ORfa5ilEthp...,3ORfa5ilEthp2U0TRcv7kv,False,Patiently Waiting,,3,track,spotify:track:3ORfa5ilEthp2U0TRcv7kv
3,[{'external_urls': {'spotify': 'https://open.s...,"[AU, CA, CR, DO, GT, HN, JP, MX, NI, PA, SV, US]",1,256226,True,{'spotify': 'https://open.spotify.com/track/2I...,https://api.spotify.com/v1/tracks/2I9foKseoFQh...,2I9foKseoFQh07p6sD2voE,False,Many Men (Wish Death),,4,track,spotify:track:2I9foKseoFQh07p6sD2voE
4,[{'external_urls': {'spotify': 'https://open.s...,"[AU, CA, CR, DO, GT, HN, JP, MX, NI, PA, SV, US]",1,193466,True,{'spotify': 'https://open.spotify.com/track/4R...,https://api.spotify.com/v1/tracks/4RY96Asd9Ief...,4RY96Asd9IefaL3X4LOLZ8,False,In Da Club,,5,track,spotify:track:4RY96Asd9IefaL3X4LOLZ8


In [297]:
tracks_df_clean = tracks_df.drop(columns = ['is_local','preview_url','track_number','type', 'artists','available_markets', 'disc_number', 'external_urls'])

In [312]:
tracks_df_clean = tracks_df_clean.drop(columns = ['href'])

In [330]:
len(tracks_df_clean['id'].unique())

10639

In [319]:
tracks_complete = tracks_df_clean.set_index('id').join(audio_feat_df.set_index('id'), on = "id", how = 'inner')

In [320]:
tracks_complete.shape

(116512, 18)

In [328]:
len(tracks_complete['id'].unique())

10638

In [333]:
tracks_complete = tracks_complete.drop_duplicates(subset = ['id'])

In [349]:
tracks_complete = tracks_complete.drop(columns = ['type', 'analysis_url','time_signature','uri','track_href'])

In [351]:
tracks_complete.reset_index(drop=True)

Unnamed: 0,id,name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,1u6KuLpfQR8EfAteIl3ej0,Intro,0.000,0.385,3,-18.509,0,0.0000,0.238000,0.97800,0.0000,0.000,0.000,6453
1,1CYyIsAaTrnmJ8MO2rec5f,What Up Gangsta,0.680,0.767,11,-2.387,0,0.0397,0.002130,0.00000,0.0996,0.344,82.518,179467
2,3ORfa5ilEthp2U0TRcv7kv,Patiently Waiting,0.538,0.675,0,-2.300,1,0.3350,0.245000,0.00000,0.3620,0.519,78.985,288880
3,2I9foKseoFQh07p6sD2voE,Many Men (Wish Death),0.653,0.826,6,-2.930,0,0.3540,0.015300,0.00146,0.1310,0.416,108.520,256227
4,4RY96Asd9IefaL3X4LOLZ8,In Da Club,0.902,0.720,6,-2.776,0,0.3470,0.260000,0.00000,0.0749,0.805,90.059,193467
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10633,0nbhwvRYCcLWPBCFdLv6Ov,Boom - Instrumental Version,0.793,0.354,11,-7.090,0,0.0489,0.000031,0.91000,0.0630,0.965,93.989,235707
10634,1AzES5aftgTTnAU9lXWOOX,Boom - Acappella Version,0.646,0.804,9,-9.206,1,0.5340,0.261000,0.00000,0.6530,0.727,87.446,187160
10635,3RJfq8ZB1H1VVBINtLZ3RQ,Soldier's Story - Street Version,0.798,0.887,6,-5.823,0,0.1950,0.024400,0.00000,0.2990,0.753,96.026,235800
10636,2rXsJCcwitoX864wcqE8XY,Soldier's Story - Radio Version,0.807,0.890,6,-5.862,0,0.1970,0.021500,0.00000,0.2400,0.717,96.024,237507


In [352]:
tracks_complete.to_csv('tracks.csv')