In [1]:
import requests
import pprint
import os

import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

import pandas as pd
import numpy as np

pp = pprint.PrettyPrinter()
datapath = './data/'

from math import pi, ceil

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [2]:
CLIENT_ID = '45844a803da249adae64a637787bfd7e'
CLIENT_SECRET = 'dc6e854038474d0ab8ed4faacc9d3080'

PLAYLIST_ID = '1MCXV57pNJkJy4EQPLBzn0' # Spotify playlist id

# API Login
client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) #spotify object to access API

# Getting tracks of the playlist
tracks = sp.user_playlist_tracks(user = 'spotify', playlist_id = PLAYLIST_ID)
tracks_uri_list = [x['track']['uri'] for x in tracks['items']]

# Getting features of tracks
features = []
for i in tracks_uri_list:
    features = features + sp.audio_features(i)

# Creating feature dataframe
cols_to_drop = ['id', 'analysis_url', 'key', 'time_signature', 'track_href', 'type', 'uri', 'mode', 'duration_ms']
features_df = pd.DataFrame(features).drop(cols_to_drop, axis=1)

## Get favourite songs or playlist songs functions

In [3]:
def get_features_from_favourites():
    '''
    Returns a dataframe of the current user's favourite songs
    '''
    df_result = pd.DataFrame()
    track_list = ''
    added_ts_list = []
    artist_list = []
    title_list = []

    more_songs = True #As long as there is tracks not fetched from API, continue looping
    offset_index = 0

    while more_songs:
        songs = sp.current_user_saved_tracks(offset=offset_index)

        for song in songs['items']:
            #join track ids to a single string as an input parameter for audio_features function
            track_list += song['track']['id'] +','

            #get the time when the song was added
            added_ts_list.append(song['added_at'])

            #get the title of the song
            title_list.append(song['track']['name'])

            #get all the artists in the song
            artists = song['track']['artists']
            artists_name = ''
            for artist in artists:
                artists_name += artist['name']  + ','
            artist_list.append(artists_name[:-1])

        #get the track features and append into a dataframe
        track_features = sp.audio_features(track_list[:-1])
        df_temp = pd.DataFrame(track_features)
        df_result = df_result.append(df_temp)
        track_list = ''

        if songs['next'] == None:
            # no more songs in playlist
            more_songs = False
        else:
            # get the next n songs
            offset_index += songs['limit']
            print('Progress: ' + str(offset_index) + ' of '+ str(songs['total']))

    #add the timestamp added, title and artists of a song
    df_result['added_at'], df_result['song_title'], df_result['artists'] = added_ts_list, title_list, artist_list
    print('--- COMPLETED ---')
    
    return df_result

In [4]:

def get_features_from_playlist(user='', playlist_id=''):
    '''
    Takes in a user_id and a playlist_id and returns a dataframe of a user's playlist songs
    '''
    df_result = pd.DataFrame()
    track_list = ''
    uploader_list = []
    added_ts_list = []
    artist_list = []
    title_list = []

    more_songs = True #As long as there is tracks not fetched from API, continue looping
    offset_index = 0
    
    if playlist_id != '' and user == '':
        print("Enter username for playlist")
        return

    while more_songs:
        songs = sp.user_playlist_tracks(user, playlist_id=playlist_id, offset=offset_index)

        for song in songs['items']:
            #join track ids to a single string as an input parameter for audio_features function
            track_list += song['track']['id'] +','

            #get the time when the song was added
            added_ts_list.append(song['added_at'])

            #get the title of the song
            title_list.append(song['track']['name'])

            #get all the artists in the song
            artists = song['track']['artists']
            artists_name = ''
            for artist in artists:
                artists_name += artist['name']  + ','
            artist_list.append(artists_name[:-1])
            
            #get user who added song in the playlist, catering for collaboration playlists
            uploader_list.append(song['added_by']['id'])

        #get the track features and append into a dataframe
        track_features = sp.audio_features(track_list[:-1])
        df_temp = pd.DataFrame(track_features)
        df_result = df_result.append(df_temp)
        track_list = ''

        if songs['next'] == None:
            # no more songs in playlist
            more_songs = False
        else:
            # get the next n songs
            offset_index += songs['limit']
            print('Progress: ' + str(offset_index) + ' of '+ str(songs['total']))

    #add the timestamp added, title and artists of a song
    df_result['added_at'], df_result['song_title'], df_result['artists'] = added_ts_list, title_list, artist_list
    
    #add upload user if in a playlist
    df_result['uploaded_by'] = uploader_list
        
    print('--- COMPLETED ---')
    
    return df_result


## Get audio features from playlist

In [5]:
user_playlists = sp.user_playlists(user='npeas')

for playlist in user_playlists['items']:
    print(playlist['id'], playlist['name'])

2WmUY6AE0pEeovDIWfnNZm Tech house💥
0rbCciOum3eOudCNnb87pW JAZZ
3nSdnHcBz2089cmM0IZaH1 Pussi
3BpNp0avJOpuG6rUHpfIgK Chill💫
4ZAXmlZCnUzIdMCIedN5gE Otros
71GoBY7bCCW8F0OldRFWb6 Skpe
7LBWEAESNVSS3bWRnEyajF Esquinas de Madrid 
0j5sQy6lN40Bv7ZtRuV3Ct TOP
1uLVOAt5uUTIUAuNUFbB4D Always
7Bf0ih2yOn2b1hg1Olv7v2 Reggae


In [11]:
df_playlist = get_features_from_playlist(user='npeas', playlist_id='7LBWEAESNVSS3bWRnEyajF')

Progress: 100 of 306
Progress: 200 of 306
Progress: 300 of 306
--- COMPLETED ---


In [12]:
df_playlist.shape

(306, 22)

In [13]:
df_playlist.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,id,uri,track_href,analysis_url,duration_ms,time_signature,added_at,song_title,artists,uploaded_by
0,0.602,0.547,7,-13.372,1,0.033,0.728,0.0,0.0641,0.645,...,45qw140Yo81dJkqv4pgvla,spotify:track:45qw140Yo81dJkqv4pgvla,https://api.spotify.com/v1/tracks/45qw140Yo81d...,https://api.spotify.com/v1/audio-analysis/45qw...,216462,4,2019-11-28T13:39:32Z,Esquinas de Madrid,84,npeas
1,0.618,0.516,4,-7.443,1,0.0321,0.729,0.0,0.113,0.367,...,6yljZV9bxz3D3yG5dX0E5g,spotify:track:6yljZV9bxz3D3yG5dX0E5g,https://api.spotify.com/v1/tracks/6yljZV9bxz3D...,https://api.spotify.com/v1/audio-analysis/6ylj...,228013,4,2019-11-28T13:40:06Z,Historias del Arte,84,npeas
2,0.618,0.701,11,-13.593,1,0.0505,0.277,0.0,0.0882,0.607,...,2xEiwSjfMz3onyI2qIO3iB,spotify:track:2xEiwSjfMz3onyI2qIO3iB,https://api.spotify.com/v1/tracks/2xEiwSjfMz3o...,https://api.spotify.com/v1/audio-analysis/2xEi...,208725,4,2019-11-28T13:40:12Z,Flor de Primavera,84,npeas
3,0.637,0.857,9,-7.287,1,0.0401,0.0142,0.00263,0.357,0.719,...,51KqByh1yYfFKRQHwhAbNQ,spotify:track:51KqByh1yYfFKRQHwhAbNQ,https://api.spotify.com/v1/tracks/51KqByh1yYfF...,https://api.spotify.com/v1/audio-analysis/51Kq...,183726,4,2019-11-28T13:40:26Z,Como si fueras a morir,84,npeas
4,0.563,0.663,11,-12.937,0,0.0463,0.407,0.0,0.296,0.803,...,1F9A9aTY7YG8ZA1Ch2UoIo,spotify:track:1F9A9aTY7YG8ZA1Ch2UoIo,https://api.spotify.com/v1/tracks/1F9A9aTY7YG8...,https://api.spotify.com/v1/audio-analysis/1F9A...,195480,4,2019-11-28T13:40:36Z,Tribunal,84,npeas


In [14]:
df_playlist.to_hdf(datapath + 'chill_playlist.h5',key='df', mode='w')

## Get audio features from saved tracks

In [10]:
df_saved_tracks = get_features_from_favourites()

HTTP Error for GET to https://api.spotify.com/v1/me/tracks returned 403 due to Insufficient client scope


SpotifyException: http status: 403, code:-1 - https://api.spotify.com/v1/me/tracks?limit=20&offset=0:
 Insufficient client scope, reason: None

## Get audio features from discover weekly

In [None]:
discover_weekly = '37i9dQZEVXcHbb4FA2VaIY'

playlist = discover_weekly

df_playlist = get_features_from_playlist(user='npeas', playlist_id=playlist)

In [None]:
df_playlist.shape

In [None]:
df_playlist.head()

In [None]:
import datetime

now = datetime.datetime.now()
ddmmyyyy = str("{:02d}".format(now.day)) + str("{:02d}".format(now.month)) + str(now.year)

savepath = datapath + 'discover_weekly_' + ddmmyyyy + '.h5'
df_playlist.to_hdf(savepath,key='df', mode='w')