In [32]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import csv

## Authentication

WARNING: Spotify's developer credentials are strictly personal, thus the file containing them will not be made available. Nevertheless, the code will still work as long as a personal "credentials.csv" file is present in the same directory

In [33]:
with open('credentials.csv', mode ='r')as file:
    read_file = csv.reader(file, delimiter=';')

    for credentials in read_file:
        cid = credentials[0]
        secret = credentials[1]

Credentials are managed through Spotipy's built-in functions

In [34]:
client_credentials_manager = SpotifyClientCredentials(client_id = cid, client_secret = secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

## Playlist extraction

Spotify is organized in playlists, meaning that even data extraction is playlist-based. \
Thus the playlist from which the tracks will be extracted needs to be specified using a Spotify link. \
Playlists are furthermore divided in pages of 100 tracks. If not differently stated only the first page will be read. \
The following function counters this problem:

In [35]:
def get_playlist_tracks(playlist_link):
    playlist_URI = playlist_link.split("/")[-1].split("?")[0]
    results = sp.playlist_tracks(playlist_URI)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [36]:
playlist_link = "https://open.spotify.com/playlist/7ibyZn0ag448uMSkven931?si=0db78178f2b24e36" 

## Data extraction

Hereafter the information of every track present in the playlist is extracted 

In [38]:
df_spoti = pd.DataFrame(columns = ['track_name','artists','album','danceability','energy','key','loudness',
                             'mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo',
                             'duration_ms','time_signature', 'popularity','year'])

for track in get_playlist_tracks(playlist_link):

    track_uri = track["track"]["uri"]
    track_name = track["track"]["name"]
    
    # Main Artist
    # artist_uri = track["track"]["artists"][0]["uri"]
    # artist_info = sp.artist(artist_uri)
    
    artists_name = ""
    for i, _ in enumerate(track["track"]["artists"]):
        if i == 0:
            artists_name = artists_name + track["track"]["artists"][i]["name"]
        else:
            artists_name = artists_name + ", " + track["track"]["artists"][i]["name"]
    
    album = track["track"]["album"]["name"]
    
    track_popularity = track["track"]["popularity"]
    
    release_year = track["track"]["album"]["release_date"][0:4]
    features = sp.audio_features(track_uri)[0]
    
    entry = pd.DataFrame.from_dict({
        "track_name":  [track_name],
        "artists": [artists_name],
        "album": [album],
        "danceability": [features["danceability"]],
        "energy": [features["energy"]],
        "key": [features["key"]],
        "loudness": [features["loudness"]],
        "mode": [features["mode"]],
        "speechiness": [features["speechiness"]],
        "acousticness": [features["acousticness"]],
        "instrumentalness": [features["instrumentalness"]],
        "liveness": [features["liveness"]],
        "valence": [features["valence"]],
        "tempo": [features["tempo"]],
        "duration_ms": [features["duration_ms"]],
        "time_signature": [features["time_signature"]],
        "popularity": [track_popularity],
        "year": [release_year]
    })
    
    df_spoti = pd.concat([df_spoti, entry], ignore_index=True)

In [39]:
df_spoti.head()

(543, 18)

## Merging datasets

Here the dataset is merged with the Rate Your Music top 500 dataset, adding the "ranking" and "Critic Score" columns

In [40]:
df_rym = pd.read_csv('./Dataset/RYM_top_tracks.csv',delimiter=";")

df = df_spoti.merge(df_rym, how='left', on=['track_name','artists'])

df.to_csv('Dataset/dataset.csv', sep=';', columns = ['track_name','artists','album','danceability','energy','key','loudness',
                                                         'mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo',
                                                         'duration_ms','time_signature', 'popularity','year','ranking','critic score'], index=False)

df.head()

Unnamed: 0,track_name,artists,album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,popularity,year,ranking,critic score
0,Wouldn't It Be Nice,The Beach Boys,Greatest Hits,0.385,0.6570,5,-6.975,1,0.0342,0.724000,0,0.1050,0.672,124.895,153867,4,57,2012,1.0,1
1,God Only Knows,The Beach Boys,50 Big Ones: Greatest Hits,0.433,0.4690,6,-9.853,0,0.0320,0.156000,0,0.0479,0.334,115.876,175787,4,49,2012,1.0,1
2,Sinnerman,Nina Simone,Pastel Blues,0.436,0.5430,11,-13.351,0,0.0962,0.654000,0.000009,0.0937,0.663,148.282,622000,4,62,1965,2.0,0997995992
3,How to Disappear Completely,Radiohead,Kid A,0.167,0.3020,6,-11.644,0,0.0345,0.316000,0.797,0.1100,0.190,102.026,356333,4,66,2000,3.0,0995991984
4,when you sleep,my bloody valentine,loveless,0.399,0.8660,6,-9.720,1,0.0558,0.000034,0.738,0.0785,0.125,128.864,251947,4,60,1991,4.0,0993987976
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
538,Army Dreamers,Kate Bush,Never for Ever,0.679,0.0742,4,-22.230,0,0.0663,0.737000,0.000096,0.0897,0.319,144.005,178640,3,51,1980,497.0,0006012024
539,Protection,"Massive Attack, Tracey Thorn",Protection,0.577,0.4210,4,-13.142,1,0.0489,0.016200,0.124,0.0722,0.737,168.009,471560,4,58,1994,498.0,0004008016
540,"Dark Was the Night, Cold Was the Ground",Blind Willie Johnson,Dark Was The Night (Mojo Workin'- Blues For Th...,0.394,0.0406,9,-23.179,1,0.0440,0.968000,0.0453,0.0976,0.200,116.117,198560,1,45,1928,499.0,0002004008
541,It's Nobody's Fault but Mine,Blind Willie Johnson,Dark Was The Night (Mojo Workin'- Blues For Th...,0.676,0.2520,3,-14.010,1,0.0390,0.980000,0.000074,0.1100,0.483,83.901,188533,4,40,1928,499.0,0002004008
