                                Expanding the collection of songs and adding audio features

In [1]:
import pandas as pd
from pandas import json_normalize

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from random import randint
from time import sleep

In [2]:
pd.set_option("display.max_columns", 0)

* Connecting to Spotify

In [3]:
secrets_file = open(".secret.txt","r")

In [4]:
string = secrets_file.read()

In [5]:
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

In [6]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

* Getting top 10 000 songs playlist

In [7]:
def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3000)/1000) # respectful nap
    return tracks

In [8]:
all_tracks = get_playlist_tracks("3BsxEMRSmNfSx6etRrg06y")
len(all_tracks)

9866

In [9]:
tracks = json_normalize(all_tracks)
tracks

* Extracting all the artists for each songs

In [10]:
artists_df = pd.DataFrame(columns=['href', 'id', 'name', 'type', 'uri', 'external_urls.spotify','song_id', 'song_name', 'popularity' ])
for i in tracks.index:
    artists_for_song = json_normalize(tracks.iloc[i]['track.artists'])
    artists_for_song['song_id']    = tracks.iloc[i]['track.id']         # we want to keep song_id, it is the sae for all artists
    artists_for_song['song_name']  = tracks.iloc[i]['track.name']       # we want to keep song_name, it is the sae for all artists
    artists_for_song['popularity'] = tracks.iloc[i]['track.popularity'] # same for popularity   
    artists_df = pd.concat([artists_df, artists_for_song], axis=0)

In [11]:
artists_df.head()

Unnamed: 0,href,id,name,type,uri,external_urls.spotify,song_id,song_name,popularity
0,https://api.spotify.com/v1/artists/2uH0RyPcX7f...,2uH0RyPcX7fnCcT90HFDQX,Manic Street Preachers,artist,spotify:artist:2uH0RyPcX7fnCcT90HFDQX,https://open.spotify.com/artist/2uH0RyPcX7fnCc...,0uJyE3M3ecGZgzltsQKs3a,A Design for Life - Remastered,52
0,https://api.spotify.com/v1/artists/7v4imS0moSy...,7v4imS0moSyGdXyLgVTIV7,Nat King Cole,artist,spotify:artist:7v4imS0moSyGdXyLgVTIV7,https://open.spotify.com/artist/7v4imS0moSyGdX...,2WMyu5IYgxEuCd6xgFgJrl,Nature Boy,58
0,https://api.spotify.com/v1/artists/1dfeR4HaWDb...,1dfeR4HaWDbWqFHLkxsg1d,Queen,artist,spotify:artist:1dfeR4HaWDbWqFHLkxsg1d,https://open.spotify.com/artist/1dfeR4HaWDbWqF...,1AhDOtG9vPSOmsWgNW0BEY,Bohemian Rhapsody - Remastered 2011,0
0,https://api.spotify.com/v1/artists/6aq8T2Rcspx...,6aq8T2RcspxVOGgMrTzjWc,Soft Cell,artist,spotify:artist:6aq8T2RcspxVOGgMrTzjWc,https://open.spotify.com/artist/6aq8T2RcspxVOG...,3HnqpxXpizCRADKCU78cLk,Tainted Love,0
0,https://api.spotify.com/v1/artists/4E9w0bms6Hc...,4E9w0bms6HcEppFlWjeW2d,Bobbie Gentry,artist,spotify:artist:4E9w0bms6HcEppFlWjeW2d,https://open.spotify.com/artist/4E9w0bms6HcEpp...,3ZsLNrysR0UFbjcaIEcDTj,Ode To Billie Joe,10


In [12]:
df_final = artists_df[['song_name', 'name', 'song_id', 'popularity']].reset_index(drop=True)
df_final

Unnamed: 0,song_name,name,song_id,popularity
0,A Design for Life - Remastered,Manic Street Preachers,0uJyE3M3ecGZgzltsQKs3a,52
1,Nature Boy,Nat King Cole,2WMyu5IYgxEuCd6xgFgJrl,58
2,Bohemian Rhapsody - Remastered 2011,Queen,1AhDOtG9vPSOmsWgNW0BEY,0
3,Tainted Love,Soft Cell,3HnqpxXpizCRADKCU78cLk,0
4,Ode To Billie Joe,Bobbie Gentry,3ZsLNrysR0UFbjcaIEcDTj,10
...,...,...,...,...
11519,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Charles Koechlin,6xKYMiIeGvGhsWYqF0hNcT,49
11520,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Gabriel Fauré,6xKYMiIeGvGhsWYqF0hNcT,49
11521,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Sarah Wegener,6xKYMiIeGvGhsWYqF0hNcT,49
11522,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Stuttgart Radio Symphony Orchestra,6xKYMiIeGvGhsWYqF0hNcT,49


In [13]:
df_final.isna().sum()
#we have to drop the rows with null values in song_id since we won't be able to extract song features for them

song_name      0
name           0
song_id       39
popularity     0
dtype: int64

In [14]:
df_final2=df_final.dropna()
df_final2

Unnamed: 0,song_name,name,song_id,popularity
0,A Design for Life - Remastered,Manic Street Preachers,0uJyE3M3ecGZgzltsQKs3a,52
1,Nature Boy,Nat King Cole,2WMyu5IYgxEuCd6xgFgJrl,58
2,Bohemian Rhapsody - Remastered 2011,Queen,1AhDOtG9vPSOmsWgNW0BEY,0
3,Tainted Love,Soft Cell,3HnqpxXpizCRADKCU78cLk,0
4,Ode To Billie Joe,Bobbie Gentry,3ZsLNrysR0UFbjcaIEcDTj,10
...,...,...,...,...
11519,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Charles Koechlin,6xKYMiIeGvGhsWYqF0hNcT,49
11520,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Gabriel Fauré,6xKYMiIeGvGhsWYqF0hNcT,49
11521,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Sarah Wegener,6xKYMiIeGvGhsWYqF0hNcT,49
11522,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Stuttgart Radio Symphony Orchestra,6xKYMiIeGvGhsWYqF0hNcT,49


* Getting audio features for each song_id

In [15]:
chunks = [(i, i+100) for i in range(0, len(df_final2), 100)]
chunks  #produces a list of tuples
audio_features_list = []
for chunk in chunks:
    id_list100 = df_final2['song_id'][chunk[0]:chunk[1]]   #chunk[0]= first number of the tuple, chunk[1]= second number
    audio_features_list = audio_features_list + sp.audio_features(id_list100)
    sleep(randint(1,3000)/1000)

In [16]:
audio_features_df = json_normalize(audio_features_list)
audio_features_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.184,0.8410,0.0,-4.723,1.0,0.0468,0.000811,0.000045,0.0422,0.349,88.224,audio_features,0uJyE3M3ecGZgzltsQKs3a,spotify:track:0uJyE3M3ecGZgzltsQKs3a,https://api.spotify.com/v1/tracks/0uJyE3M3ecGZ...,https://api.spotify.com/v1/audio-analysis/0uJy...,257640.0,4.0
1,0.275,0.0895,2.0,-19.755,0.0,0.0374,0.971000,0.022100,0.1080,0.111,104.144,audio_features,2WMyu5IYgxEuCd6xgFgJrl,spotify:track:2WMyu5IYgxEuCd6xgFgJrl,https://api.spotify.com/v1/tracks/2WMyu5IYgxEu...,https://api.spotify.com/v1/audio-analysis/2WMy...,173533.0,4.0
2,0.414,0.4040,0.0,-9.928,0.0,0.0499,0.271000,0.000000,0.3000,0.224,71.105,audio_features,1AhDOtG9vPSOmsWgNW0BEY,spotify:track:1AhDOtG9vPSOmsWgNW0BEY,https://api.spotify.com/v1/tracks/1AhDOtG9vPSO...,https://api.spotify.com/v1/audio-analysis/1AhD...,354320.0,4.0
3,0.518,0.5100,0.0,-8.057,0.0,0.0352,0.476000,0.000000,0.2920,0.651,144.543,audio_features,3HnqpxXpizCRADKCU78cLk,spotify:track:3HnqpxXpizCRADKCU78cLk,https://api.spotify.com/v1/tracks/3HnqpxXpizCR...,https://api.spotify.com/v1/audio-analysis/3Hnq...,153880.0,4.0
4,0.595,0.1820,7.0,-11.078,1.0,0.0517,0.786000,0.000000,0.0847,0.642,117.352,audio_features,3ZsLNrysR0UFbjcaIEcDTj,spotify:track:3ZsLNrysR0UFbjcaIEcDTj,https://api.spotify.com/v1/tracks/3ZsLNrysR0UF...,https://api.spotify.com/v1/audio-analysis/3ZsL...,254600.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11480,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0
11481,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0
11482,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0
11483,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0


In [17]:
audio_features_df.drop_duplicates(inplace=True) # duplicates because some songs have more artists
audio_features_df.shape

(9958, 18)

* Concatenating the songs with their audio features and exporting the df in a csv file

In [18]:
df_w_audio_ft = pd.merge(left=df_final2, right=audio_features_df, how='inner', left_on='song_id',right_on='id')
df_w_audio_ft

Unnamed: 0,song_name,name,song_id,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,A Design for Life - Remastered,Manic Street Preachers,0uJyE3M3ecGZgzltsQKs3a,52,0.184,0.8410,0.0,-4.723,1.0,0.0468,0.000811,0.000045,0.0422,0.349,88.224,audio_features,0uJyE3M3ecGZgzltsQKs3a,spotify:track:0uJyE3M3ecGZgzltsQKs3a,https://api.spotify.com/v1/tracks/0uJyE3M3ecGZ...,https://api.spotify.com/v1/audio-analysis/0uJy...,257640.0,4.0
1,Nature Boy,Nat King Cole,2WMyu5IYgxEuCd6xgFgJrl,58,0.275,0.0895,2.0,-19.755,0.0,0.0374,0.971000,0.022100,0.1080,0.111,104.144,audio_features,2WMyu5IYgxEuCd6xgFgJrl,spotify:track:2WMyu5IYgxEuCd6xgFgJrl,https://api.spotify.com/v1/tracks/2WMyu5IYgxEu...,https://api.spotify.com/v1/audio-analysis/2WMy...,173533.0,4.0
2,Bohemian Rhapsody - Remastered 2011,Queen,1AhDOtG9vPSOmsWgNW0BEY,0,0.414,0.4040,0.0,-9.928,0.0,0.0499,0.271000,0.000000,0.3000,0.224,71.105,audio_features,1AhDOtG9vPSOmsWgNW0BEY,spotify:track:1AhDOtG9vPSOmsWgNW0BEY,https://api.spotify.com/v1/tracks/1AhDOtG9vPSO...,https://api.spotify.com/v1/audio-analysis/1AhD...,354320.0,4.0
3,Tainted Love,Soft Cell,3HnqpxXpizCRADKCU78cLk,0,0.518,0.5100,0.0,-8.057,0.0,0.0352,0.476000,0.000000,0.2920,0.651,144.543,audio_features,3HnqpxXpizCRADKCU78cLk,spotify:track:3HnqpxXpizCRADKCU78cLk,https://api.spotify.com/v1/tracks/3HnqpxXpizCR...,https://api.spotify.com/v1/audio-analysis/3Hnq...,153880.0,4.0
4,Ode To Billie Joe,Bobbie Gentry,3ZsLNrysR0UFbjcaIEcDTj,10,0.595,0.1820,7.0,-11.078,1.0,0.0517,0.786000,0.000000,0.0847,0.642,117.352,audio_features,3ZsLNrysR0UFbjcaIEcDTj,spotify:track:3ZsLNrysR0UFbjcaIEcDTj,https://api.spotify.com/v1/tracks/3ZsLNrysR0UF...,https://api.spotify.com/v1/audio-analysis/3ZsL...,254600.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11477,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Charles Koechlin,6xKYMiIeGvGhsWYqF0hNcT,49,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0
11478,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Gabriel Fauré,6xKYMiIeGvGhsWYqF0hNcT,49,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0
11479,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Sarah Wegener,6xKYMiIeGvGhsWYqF0hNcT,49,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0
11480,"Pelleas et Melisande, Op. 80 (arr. C. Koechlin...",Stuttgart Radio Symphony Orchestra,6xKYMiIeGvGhsWYqF0hNcT,49,0.000,0.0170,6.0,-33.687,0.0,0.0000,0.972000,0.854000,0.0752,0.000,0.000,audio_features,6xKYMiIeGvGhsWYqF0hNcT,spotify:track:6xKYMiIeGvGhsWYqF0hNcT,https://api.spotify.com/v1/tracks/6xKYMiIeGvGh...,https://api.spotify.com/v1/audio-analysis/6xKY...,63800.0,0.0


In [19]:
#df_w_audio_ft.to_csv('top_10000_songs.csv', index=False)