In [None]:
import sys
import pandas as pd
import numpy as np
import os
import json
from json.decoder import JSONDecodeError
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

<img src="FWBF_NTDS_Graphics.png" alt="Alt text that describes the graphic" title="FWBF_NTDS_Graphics" />

# The SpotipyScrapper

#Here is the basis for scrapping the data using the [Spotipy](https://github.com/plamere/spotipy) python library that uses the potential of the [Spotify Web API](https://developer.spotify.com/documentation/web-api/) to access data. The goal of this script was to get the data from the ['Your top Songs of 2018'](spotifywrapped.com) Spotify playlists of 25 volunteer users. For convinience, the playlist were previously copied to one single account to ease the login process as the time was running. Creditentials are found on the [Spotify For Developer Page](https://developer.spotify.com/dashboard/) once the linked app or project is registred.

In [None]:
# Initialization of the creditentials, accesible through Spotify For Developer Page
client_id_JC='replace_by_your_cliend_id'
client_secret_JC='replace_by_your_client_secret'
redirect_uri_JC='replace_by_your_redirect_uri'

In [None]:
# Get the autorization token for the current session. Open a new windows to ask the user to log.
if __name__ == '__main__':  
    username = 'replace_by_the_desired_spotify_username'
    scope = None
    try:
        token = util.prompt_for_user_token(username, scope,client_id = client_id_JC,client_secret = client_secret_JC, redirect_uri = redirect_uri_JC)
    except (AttributeError, JSONDecodeError):
        os.remove(f".cache-{username}")
        token = util.prompt_for_user_token(username, scope,client_id = client_id_JC,client_secret = client_secret_JC, redirect_uri = redirect_uri_JC)
    #token = util.prompt_for_user_token(username)
    if token:
        sp = spotipy.Spotify(auth=token)
    else:
        print("Can't get token for", username)

In [None]:
#Function used mostly for debugging and data exploration before export.
def show_tracks(results):
    for i, item in enumerate(results['items']):
        track = item['track']
        print(" %d , %32.32s , %s , %s , %d , %d, %d" % (i+1, track['artists'][0]['name'], track['name'],track['album']['release_date'],track['disc_number'],track['popularity'],track['duration_ms']))

#Function used to compile everything into a single csv file and save it properly.
def save_tracks(tracks,playlist_name):
    #tracks = results['tracks']
    saved_tracks = pd.DataFrame(columns=['Playlist Origin'
                                         ,'Playlist order'
                                         ,'Artist'
                                         ,'Track Name'
                                         ,'Album Name'
                                         ,'Album Release Date'
                                         ,'Track Number'
                                         ,'Track Popularity'
                                         ,'Track Id'
                                         ,'Track Duration MS'
                                         ,'Danceability'
                                         ,'Energy'
                                         ,'Key'
                                         ,'Loudness'
                                         ,'Mode'
                                         ,'Speechiness'
                                         ,'Acousticness'
                                         ,'Instrumentalness'
                                         ,'Liveness'
                                         ,'Valence'
                                         ,'Tempo'
                                         ,'Valence'
                                         ,'Track_href'
                                         ,'Time_signature'
                                         ,'uri'
                                         ,'Genres'])
    for i, item in enumerate(tracks['items']):
        track = item['track']
        artist = []
        artist = sp.artist(track['artists'][0]['id'])
        
        features = []
        features = sp.audio_features(track['uri'])
        saved_tracks.loc[i] = [playlist_name
                               ,i+1
                               ,track['artists'][0]['name']
                               ,track['name']
                               ,track['album']['name']
                               ,track['album']['release_date']
                               ,track['disc_number']
                               ,track['popularity']
                               ,track['id']
                               ,track['duration_ms']
                               ,features[0]['danceability']
                               ,features[0]['energy']
                               ,features[0]['key']
                               ,features[0]['loudness']
                               ,features[0]['mode']
                               ,features[0]['speechiness']
                               ,features[0]['acousticness']
                               ,features[0]['instrumentalness']
                               ,features[0]['liveness']
                               ,features[0]['valence']
                               ,features[0]['tempo']
                               ,features[0]['valence']
                               ,features[0]['track_href']
                               ,features[0]['time_signature']
                               ,features[0]['uri']
                               ,artist['genres']]
    file_name = 'user_data/'+playlist_name+'.csv'
    saved_tracks.to_csv(file_name, sep=';')
    return saved_tracks

<div class="alert alert-block alert-warning">
<font color='#B8860B'>
<b>Note</b>
</font>
<font color='black'>
<br> For more information about the audio features of the songs and their meaning, please visit the [corresponding page](https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/) or the global [Spotify Web API](https://developer.spotify.com/documentation/web-api/).
</font>
</div>

In [None]:
#Get the list of all the public playlist of the given user
playlists = sp.user_playlists(username)
playlist_dataframe = pd.DataFrame(columns=['Playlist name','Playlist uri','Playlist Id'])

#Print them:
while playlists:
    for i, playlist in enumerate(playlists['items']):
        playlist_dataframe.loc[i] = [playlist['name'],playlist['uri'],playlist['id']]
        print("%4d %s %s %s" % (i ,playlist['name'], playlist['id'], playlist['uri']))
    if playlists['next']:
        playlists = sp.next(playlists)
    else:
        playlists = None

In [None]:
#Save tracks for a single playlist ID:
def track_savior(playlist_id):
    playlist = sp.user_playlist(username,playlist_id=playlist_id)
    if playlist['owner']['id'] == username:
        print('To save: ',playlist['name'])
        print('Number of tracks', playlist['tracks']['total'])
        results = sp.user_playlist(username, playlist['id'], fields="tracks,next")
        tracks = results['tracks']
        saved_tracks = save_tracks(tracks,playlist['name'])

In [None]:
#Save tracks for a single playlist of choice among the available one:
def single_playlist_savior():
    print('Which playlist do you want?')
    print(playlist_dataframe['Playlist name'])
    selected_one = int(input())
    chosen_playlist_id = playlist_dataframe.loc[selected_one,"Playlist Id"]
    print('You chose the', playlist_dataframe.loc[selected_one,"Playlist name"], 'playlist')
    file_list = os.listdir('user_data')
    if any(playlist_dataframe.loc[selected_one,"Playlist name"] in s for s in file_list):
        print(playlist_dataframe.loc[selected_one,"Playlist name"],'is already done!')
        print('Overwrite?')
        yes_no = input()
        if ((yes_no.lower() == 'y') or (yes_no.lower() == 'yes')):
            track_savior(chosen_playlist_id)
    else:
        track_savior(chosen_playlist_id)
    #print('The ID is :', playlist_dataframe.loc[selected_one,"Playlist Id"])

In [None]:
single_playlist_savior()

In [None]:
#Iterate through the list of playlist and save the one that are not already done. In case of overwriting, use single playlist saviour
for i in range(0,len(playlist_dataframe)):
    chosen_playlist_id = playlist_dataframe.loc[i,"Playlist Id"]
    file_list = os.listdir('user_data')
    if any(playlist_dataframe.loc[i,"Playlist name"] in s for s in file_list):
        print(playlist_dataframe.loc[i,"Playlist name"],'is already done!')
    else:
        print('Processing the', playlist_dataframe.loc[i,"Playlist name"], 'playlist')
        print('whose ID is :', playlist_dataframe.loc[i,"Playlist Id"])
        track_savior(chosen_playlist_id)

<div class="alert alert-block alert-warning">
<font color='#B8860B'>
<b>Note</b>
</font>
<font color='black'>
<br> Now, every public playlist of the desired user is saved with the given features into a .csv file. For data processing, see the FWBF notebook. An huge thanks to the contributors of the Spotipy Library, especially the [exemple](https://github.com/plamere/spotipy/tree/master/examples) section, may the music God be good with you!
</font>
</div>