In [14]:
import config

In [15]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials


#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

# The "sp" variable has two useful funtions:
# The first usefull function is:
# .search(q='',limit=n)
# .search(q="track:"+song_name+" artist:"+artist_name,limit=5) to restrict to a song name and artist.
# Where the "q" keyword is the query you want to perform on spotify: song_name, artist,...
# while The "limit" keyword will limit the number of returned results.
#
# The second usefull function is:
# .audio_features([URL|URI|ID])
# which returns some 'features of the song', that after cleanup, we can use in order to characterize a song.


In [3]:
import pprint
import pandas as pd
import time

### Getting all the playlist id's from the user 'Spotify'

In [4]:
results = sp.user_playlists('spotify')
playlists = results['items']
while results['next']:
    results = sp.next(results)
    playlists.extend(results['items'])

In [5]:
playlist_id_list = []
for i in playlists:
    playlist_id_list.append(i['id'])

### Splitting it into chunks, because of Spotify's API limits

In [6]:
def chunks(lst, n):
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
playlist_id_chunks = list(chunks(playlist_id_list, 50))

In [11]:
df = pd.read_csv('../data/spotify_songs_v1.csv')

In [16]:
def playlists_spotify(df, playlist_id_list):
    """
    The function takes a list with stored playlist id's.
    It stores all the track id's and the corresponding audio features
    from the tracks on these playlist in a dataframe.
    """
    #df = pd.DataFrame()

    for id_ in playlist_id_list:
        results = sp.user_playlist_tracks('spotify', id_)
        tracks = results['items']
        while results['next']:
            results = sp.next(results)
            tracks.extend(results['items'])

        track_id_list = [] 
        for i in tracks:
            track_id_list.append(i['track']['id'])
              
        audio_list =[]
        for i in track_id_list:
            if i not in list(df['id']):
                time.sleep(0.5)
                audio_list.append(sp.audio_features(i)[0])

        playlist_df = pd.DataFrame(audio_list)

        df = pd.concat([df, playlist_df], ignore_index=True)
    
    return df

In [14]:
df1 = playlists_spotify(playlist_id_chunks[0])

In [28]:
df1.to_csv('../data/tracks2.csv', index=False)

In [29]:
df2 = playlists_spotify(playlist_id_chunks[1])

In [1]:
#df3 = playlists_spotify(df, playlist_id_chunks[2])

In [37]:
df4 = playlists_spotify(playlist_id_chunks[3])

In [39]:
df5 = playlists_spotify(playlist_id_chunks[4])

In [None]:
df6 = playlists_spotify(playlist_id_chunks[5])

In [66]:
df7 = playlists_spotify(playlist_id_chunks[6])

In [None]:
df8 = playlists_spotify(playlist_id_chunks[7])

In [None]:
df9 = playlists_spotify(playlist_id_chunks[8])

In [None]:
df10 = playlists_spotify(playlist_id_chunks[9])

In [None]:
df11 = playlists_spotify(playlist_id_chunks[10])

In [None]:
df12 = playlists_spotify(playlist_id_chunks[11])

In [None]:
df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12], ignore_index=True)

In [None]:
df.to_csv('data/spotify_tracks.csv', index= False)