In [56]:
import requests
import pprint
import os

import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

import pandas as pd
import numpy as np

pp = pprint.PrettyPrinter()
datapath = '../notebooks/'

from math import pi, ceil

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [44]:
CLIENT_ID = '45844a803da249adae64a637787bfd7e'
CLIENT_SECRET = 'dc6e854038474d0ab8ed4faacc9d3080'

PLAYLIST_ID = '1MCXV57pNJkJy4EQPLBzn0' # Spotify playlist id

# API Login
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

# Getting tracks of the playlist
tracks = sp.user_playlist_tracks(user = 'spotify', playlist_id = PLAYLIST_ID)
tracks_uri_list = [x['track']['uri'] for x in tracks['items']]

# Getting features of tracks
features = []
for i in tracks_uri_list:
    features = features + sp.audio_features(i)

# Creating feature dataframe
cols_to_drop = ['id', 'analysis_url', 'key', 'time_signature', 'track_href', 'type', 'uri', 'mode', 'duration_ms']
features_df = pd.DataFrame(features).drop(cols_to_drop, axis=1)

In [45]:

def get_features_from_favourites():
    '''
    Returns a dataframe of the current user's favourite songs
    '''
    df_result = pd.DataFrame()
    track_list = ''
    added_ts_list = []
    artist_list = []
    title_list = []

    more_songs = True #As long as there is tracks not fetched from API, continue looping
    offset_index = 0

    while more_songs:
        songs = sp.current_user_saved_tracks(offset=offset_index)

        for song in songs['items']:
            #join track ids to a single string as an input parameter for audio_features function
            track_list += song['track']['id'] +','

            #get the time when the song was added
            added_ts_list.append(song['added_at'])

            #get the title of the song
            title_list.append(song['track']['name'])

            #get all the artists in the song
            artists = song['track']['artists']
            artists_name = ''
            for artist in artists:
                artists_name += artist['name']  + ','
            artist_list.append(artists_name[:-1])

        #get the track features and append into a dataframe
        track_features = sp.audio_features(track_list[:-1])
        df_temp = pd.DataFrame(track_features)
        df_result = df_result.append(df_temp)
        track_list = ''

        if songs['next'] == None:
            # no more songs in playlist
            more_songs = False
        else:
            # get the next n songs
            offset_index += songs['limit']
            print('Progress: ' + str(offset_index) + ' of '+ str(songs['total']))

    #add the timestamp added, title and artists of a song
    df_result['added_at'], df_result['song_title'], df_result['artists'] = added_ts_list, title_list, artist_list
    print('--- COMPLETED ---')
    
    return df_result

In [46]:

def get_features_from_playlist(user='', playlist_id=''):
    '''
    Takes in a user_id and a playlist_id and returns a dataframe of a user's playlist songs
    '''
    df_result = pd.DataFrame()
    track_list = ''
    uploader_list = []
    added_ts_list = []
    artist_list = []
    title_list = []

    more_songs = True #As long as there is tracks not fetched from API, continue looping
    offset_index = 0
    
    if playlist_id != '' and user == '':
        print("Enter username for playlist")
        return

    while more_songs:
        songs = sp.user_playlist_tracks(user, playlist_id=playlist_id, offset=offset_index)

        for song in songs['items']:
            #join track ids to a single string as an input parameter for audio_features function
            track_list += song['track']['id'] +','

            #get the time when the song was added
            added_ts_list.append(song['added_at'])

            #get the title of the song
            title_list.append(song['track']['name'])

            #get all the artists in the song
            artists = song['track']['artists']
            artists_name = ''
            for artist in artists:
                artists_name += artist['name']  + ','
            artist_list.append(artists_name[:-1])
            
            #get user who added song in the playlist, catering for collaboration playlists
            uploader_list.append(song['added_by']['id'])

        #get the track features and append into a dataframe
        track_features = sp.audio_features(track_list[:-1])
        df_temp = pd.DataFrame(track_features)
        df_result = df_result.append(df_temp)
        track_list = ''

        if songs['next'] == None:
            # no more songs in playlist
            more_songs = False
        else:
            # get the next n songs
            offset_index += songs['limit']
            print('Progress: ' + str(offset_index) + ' of '+ str(songs['total']))

    #add the timestamp added, title and artists of a song
    df_result['added_at'], df_result['song_title'], df_result['artists'] = added_ts_list, title_list, artist_list
    
    #add upload user if in a playlist
    df_result['uploaded_by'] = uploader_list
        
    print('--- COMPLETED ---')
    
    return df_result

In [47]:
user_playlists = sp.user_playlists(user='npeas')

for playlist in user_playlists['items']:
    print(playlist['id'], playlist['name'])

2WmUY6AE0pEeovDIWfnNZm Tech houseðŸ’¥
0rbCciOum3eOudCNnb87pW JAZZ
3nSdnHcBz2089cmM0IZaH1 Pussi
3BpNp0avJOpuG6rUHpfIgK ChillðŸ’«
4ZAXmlZCnUzIdMCIedN5gE Otros
71GoBY7bCCW8F0OldRFWb6 Skpe
7LBWEAESNVSS3bWRnEyajF Esquinas de Madrid 
0j5sQy6lN40Bv7ZtRuV3Ct TOP
1uLVOAt5uUTIUAuNUFbB4D Always
7Bf0ih2yOn2b1hg1Olv7v2 Reggae


In [48]:
df_playlist = get_features_from_playlist(user='npeas', playlist_id='1MCXV57pNJkJy4EQPLBzn0')

--- COMPLETED ---


In [49]:
df_playlist.shape

(26, 22)

In [50]:
df_playlist.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,id,uri,track_href,analysis_url,duration_ms,time_signature,added_at,song_title,artists,uploaded_by
0,0.593,0.749,5,-5.671,1,0.0475,0.0116,0.108,0.314,0.65,...,5ruzrDWcT0vuJIOMW7gMnW,spotify:track:5ruzrDWcT0vuJIOMW7gMnW,https://api.spotify.com/v1/tracks/5ruzrDWcT0vu...,https://api.spotify.com/v1/audio-analysis/5ruz...,309053,4,2020-12-10T15:59:07Z,The Adults Are Talking,The Strokes,1195145834
1,0.575,0.72,4,-10.72,0,0.0539,0.0956,0.0587,0.18,0.326,...,6lhuBaygBqxBjdcXrM685k,spotify:track:6lhuBaygBqxBjdcXrM685k,https://api.spotify.com/v1/tracks/6lhuBaygBqxB...,https://api.spotify.com/v1/audio-analysis/6lhu...,216634,4,2020-12-10T15:59:29Z,rue,girl in red,1195145834
2,0.641,0.842,9,-7.27,1,0.0292,0.132,0.91,0.419,0.5,...,64lsIF5pw0sJY0gV5kz0RN,spotify:track:64lsIF5pw0sJY0gV5kz0RN,https://api.spotify.com/v1/tracks/64lsIF5pw0sJ...,https://api.spotify.com/v1/audio-analysis/64ls...,278769,4,2020-12-10T15:59:41Z,Can I Call You Tonight?,Dayglow,1195145834
3,0.689,0.858,4,-2.868,1,0.161,0.00383,0.0,0.129,0.643,...,7ACT6YaXbYvl7hRWEOOEHQ,spotify:track:7ACT6YaXbYvl7hRWEOOEHQ,https://api.spotify.com/v1/tracks/7ACT6YaXbYvl...,https://api.spotify.com/v1/audio-analysis/7ACT...,126465,4,2020-12-10T16:00:18Z,Double Negative (Skeleton Milkshake),Dominic Fike,1195145834
4,0.518,0.436,4,-14.134,1,0.0349,0.0157,0.000621,0.079,0.27,...,6mcxQ1Y3uQRU0IHsvdNLH1,spotify:track:6mcxQ1Y3uQRU0IHsvdNLH1,https://api.spotify.com/v1/tracks/6mcxQ1Y3uQRU...,https://api.spotify.com/v1/audio-analysis/6mcx...,236973,4,2020-12-10T16:00:45Z,Where Is My Mind?,Pixies,1195145834


In [57]:
df_playlist.to_hdf(datapath + 'chill_playlist.h5',key='df', mode='w')

OSError: ``../notebooks`` does not exist