# 6.5 GNOD - API wrappers

create a collection of songs with their audio features

In [1]:
#!pip install spotipy

#### Authentification

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

#### retrieving secrets
* creating a .gitignore file containing the name of the file secrets.txt on a line by its own,
* prevent the secrets file from being pushed to github

In [3]:
secrets_file = open("secrets.txt","r")

In [4]:
string = secrets_file.read()

In [5]:
#creating a dictionary
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        #print(line.split(':'))
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

#### authentication with secrets

In [6]:
#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

### Playlists

We will need to collect a "database" of songs. Playlists are a good way to access relatively large amounts of songs.

In [7]:
# 10 000 title playlist
playlist = sp.user_playlist_tracks("spotify", "6yPiKpy7evrwvZodByKvM9")


In [8]:
# items
playlist.keys() 

dict_keys(['href', 'items', 'limit', 'next', 'offset', 'previous', 'total'])

In [9]:
playlist["items"][0].keys()

dict_keys(['added_at', 'added_by', 'is_local', 'primary_color', 'track', 'video_thumbnail'])

In [12]:
# how many songs : playlist["total"]

In [14]:
from random import randint
from time import sleep

def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results) #sp.next goes to the next page
        tracks = tracks + results['items']
        sleep(randint(0,1)/1000) #  Sleep for a random time between 0 and 1 second (1000 for millisecond)
    return tracks

In [15]:
all_tracks = get_playlist_tracks("6yPiKpy7evrwvZodByKvM9")
len(all_tracks)

10000

In [17]:
from pandas import json_normalize
pd.set_option("display.max_columns", 0)

tracks2 = json_normalize(all_tracks) #create a df out of all tracks of the playlist 
#still 10 000 rows

In [19]:
artists_df2 = pd.DataFrame(columns=['href', 'id', 'name', 'type', 'uri', 'external_urls.spotify','song_id', 'song_name', 'popularity' ])
for i in tracks2.index:
    artists_for_song = json_normalize(tracks2.iloc[i]['track.artists'])
    artists_for_song['song_id']    = tracks2.iloc[i]['track.id']         # keep song_id
    artists_for_song['song_name']  = tracks2.iloc[i]['track.name']       # keep song_name
    artists_for_song['popularity'] = tracks2.iloc[i]['track.popularity'] # popularity   
    artists_df2 = pd.concat([artists_df2, artists_for_song], axis=0)

In [21]:
artists_df2.head(8) #artists with urls

Unnamed: 0,href,id,name,type,uri,external_urls.spotify,song_id,song_name,popularity
0,https://api.spotify.com/v1/artists/0IVapwlnM3d...,0IVapwlnM3dEOiMsHXsghT,Nosaj Thing,artist,spotify:artist:0IVapwlnM3dEOiMsHXsghT,https://open.spotify.com/artist/0IVapwlnM3dEOi...,33xMbeHzmWd6Od0BmLZEUs,2K,0
0,https://api.spotify.com/v1/artists/22WZ7M8sxp5...,22WZ7M8sxp5THdruNY3gXt,The Doors,artist,spotify:artist:22WZ7M8sxp5THdruNY3gXt,https://open.spotify.com/artist/22WZ7M8sxp5THd...,3UnyplmZaq547hwsfOR5yy,4 Billion Souls,26
0,https://api.spotify.com/v1/artists/4Z8W4fKeB5Y...,4Z8W4fKeB5YxbusRsdQVPb,Radiohead,artist,spotify:artist:4Z8W4fKeB5YxbusRsdQVPb,https://open.spotify.com/artist/4Z8W4fKeB5Yxbu...,1w8QCSDH4QobcQeT4uMKLm,4 Minute Warning,0
0,https://api.spotify.com/v1/artists/0UK6JkgUMa2...,0UK6JkgUMa28b4t8eCtg6P,Vitas,artist,spotify:artist:0UK6JkgUMa28b4t8eCtg6P,https://open.spotify.com/artist/0UK6JkgUMa28b4...,7J9mBHG4J2eIfDAv5BehKA,7 Element,0
0,https://api.spotify.com/v1/artists/4KWTAlx2Rvb...,4KWTAlx2RvbpseOGMEmROg,R.E.M.,artist,spotify:artist:4KWTAlx2RvbpseOGMEmROg,https://open.spotify.com/artist/4KWTAlx2Rvbpse...,1VZedwJj1gyi88WFRhfThb,#9 Dream,6
0,https://api.spotify.com/v1/artists/2jzc5TC5TVF...,2jzc5TC5TVFLXQlBNiIUzE,a-ha,artist,spotify:artist:2jzc5TC5TVFLXQlBNiIUzE,https://open.spotify.com/artist/2jzc5TC5TVFLXQ...,3DSmOF7ETJHAQEqqgoSJao,#9 Dream,12
0,https://api.spotify.com/v1/artists/7mnBLXK823v...,7mnBLXK823vNxN3UWB7Gfz,The Black Keys,artist,spotify:artist:7mnBLXK823vNxN3UWB7Gfz,https://open.spotify.com/artist/7mnBLXK823vNxN...,5HPnumlogzZtpLEaORZjz1,10 Lovers,53
0,https://api.spotify.com/v1/artists/1rAv1GhTQ2r...,1rAv1GhTQ2rmG94p9lU3rB,Julian Casablancas,artist,spotify:artist:1rAv1GhTQ2rmG94p9lU3rB,https://open.spotify.com/artist/1rAv1GhTQ2rmG9...,6i99ThziImjuFABb8rUQvy,11th Dimension,56


In [22]:
df_final2 = artists_df2[['song_name', 'name', 'song_id', 'popularity']]
df_final2.head() #final df for artists

Unnamed: 0,song_name,name,song_id,popularity
0,2K,Nosaj Thing,33xMbeHzmWd6Od0BmLZEUs,0
0,4 Billion Souls,The Doors,3UnyplmZaq547hwsfOR5yy,26
0,4 Minute Warning,Radiohead,1w8QCSDH4QobcQeT4uMKLm,0
0,7 Element,Vitas,7J9mBHG4J2eIfDAv5BehKA,0
0,#9 Dream,R.E.M.,1VZedwJj1gyi88WFRhfThb,6


In [34]:
df_final2.isnull().sum() #82 nulls to remove

song_name      0
name           0
song_id       82
popularity     0
dtype: int64

In [36]:
df_final2.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final2.dropna(inplace=True)


In [37]:
audio_features_list = []
chunks = [(i, i+100) for i in range(0, len(df_final2), 100)]
for chunk in chunks:
    id_list100 = df_final2['song_id'][chunk[0]:chunk[1]]  
    audio_features_list = audio_features_list + sp.audio_features(id_list100)
    sleep(randint(1,3000)/1000)

In [38]:
len(audio_features_list)

11771

In [39]:
audio_features_df = json_normalize(audio_features_list)
audio_features_df.drop_duplicates(inplace=True) # duplicates because some songs have more artists

In [40]:
audio_features_df.shape

(9894, 18)

In [41]:
df_w_audio_ft = pd.merge(left=df_final2,
                        right=audio_features_df,
                        how='inner',
                        left_on='song_id',
                        right_on='id')


In [42]:
df_w_audio_ft.shape
#there are duplicate songs (but artist is different) because of the left merge on df_final2

Unnamed: 0,song_name,name,song_id,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,2K,Nosaj Thing,33xMbeHzmWd6Od0BmLZEUs,0,0.310,0.445,7,-13.355,0,0.0863,0.09400,0.067800,0.1130,0.122,95.360,audio_features,33xMbeHzmWd6Od0BmLZEUs,spotify:track:33xMbeHzmWd6Od0BmLZEUs,https://api.spotify.com/v1/tracks/33xMbeHzmWd6...,https://api.spotify.com/v1/audio-analysis/33xM...,152560,3
1,4 Billion Souls,The Doors,3UnyplmZaq547hwsfOR5yy,26,0.419,0.565,5,-11.565,1,0.0347,0.13700,0.337000,0.1280,0.648,151.277,audio_features,3UnyplmZaq547hwsfOR5yy,spotify:track:3UnyplmZaq547hwsfOR5yy,https://api.spotify.com/v1/tracks/3UnyplmZaq54...,https://api.spotify.com/v1/audio-analysis/3Uny...,197707,4
2,4 Minute Warning,Radiohead,1w8QCSDH4QobcQeT4uMKLm,0,0.354,0.302,9,-13.078,1,0.0326,0.59000,0.070900,0.1110,0.223,123.753,audio_features,1w8QCSDH4QobcQeT4uMKLm,spotify:track:1w8QCSDH4QobcQeT4uMKLm,https://api.spotify.com/v1/tracks/1w8QCSDH4Qob...,https://api.spotify.com/v1/audio-analysis/1w8Q...,244285,4
3,7 Element,Vitas,7J9mBHG4J2eIfDAv5BehKA,0,0.727,0.785,5,-6.707,0,0.0603,0.32500,0.126000,0.3100,0.960,129.649,audio_features,7J9mBHG4J2eIfDAv5BehKA,spotify:track:7J9mBHG4J2eIfDAv5BehKA,https://api.spotify.com/v1/tracks/7J9mBHG4J2eI...,https://api.spotify.com/v1/audio-analysis/7J9m...,249940,4
4,#9 Dream,R.E.M.,1VZedwJj1gyi88WFRhfThb,6,0.571,0.724,0,-5.967,1,0.0260,0.02310,0.003110,0.0919,0.385,116.755,audio_features,1VZedwJj1gyi88WFRhfThb,spotify:track:1VZedwJj1gyi88WFRhfThb,https://api.spotify.com/v1/tracks/1VZedwJj1gyi...,https://api.spotify.com/v1/audio-analysis/1VZe...,278320,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11766,London Calling - Remastered,The Clash,5jzma6gCzYtKB1DbEwFZKH,75,0.651,0.801,0,-7.340,1,0.0514,0.12300,0.000000,0.0825,0.776,133.769,audio_features,5jzma6gCzYtKB1DbEwFZKH,spotify:track:5jzma6gCzYtKB1DbEwFZKH,https://api.spotify.com/v1/tracks/5jzma6gCzYtK...,https://api.spotify.com/v1/audio-analysis/5jzm...,200480,4
11767,Low Rider,War,2fmMPJb5EzZCx8BcNJvVk4,0,0.811,0.647,0,-10.989,1,0.0498,0.08230,0.681000,0.0572,0.990,139.787,audio_features,2fmMPJb5EzZCx8BcNJvVk4,spotify:track:2fmMPJb5EzZCx8BcNJvVk4,https://api.spotify.com/v1/tracks/2fmMPJb5EzZC...,https://api.spotify.com/v1/audio-analysis/2fmM...,191560,4
11768,Flower,Moby,60rIdEPDrzyLiLC0icp3xz,0,0.686,0.610,7,-5.902,1,0.0262,0.19000,0.000931,0.0710,0.766,80.567,audio_features,60rIdEPDrzyLiLC0icp3xz,spotify:track:60rIdEPDrzyLiLC0icp3xz,https://api.spotify.com/v1/tracks/60rIdEPDrzyL...,https://api.spotify.com/v1/audio-analysis/60rI...,206293,4
11769,Brighter Than Gold,The Cat Empire,0sEm1ld0V8YTCPcjPVfIsc,47,0.711,0.718,6,-5.739,1,0.0380,0.00824,0.002080,0.0732,0.688,117.071,audio_features,0sEm1ld0V8YTCPcjPVfIsc,spotify:track:0sEm1ld0V8YTCPcjPVfIsc,https://api.spotify.com/v1/tracks/0sEm1ld0V8YT...,https://api.spotify.com/v1/audio-analysis/0sEm...,200293,4


In [43]:
df_w_audio_ft.to_csv('spotify_10000_audio_features.csv')