In [1]:
# spotipy modules
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
# other libraries
from decouple import config 
import time

In [3]:
# visualisation libraries
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# environment variables
client_id = config('SPOTIPY_CLIENT_ID')
client_secret = config('SPOTIPY_CLIENT_SECRET')
redirect_uri = 'http://google.com/'

In [5]:
# authorization
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)

In [6]:
username = 'sidsaxena'

In [7]:
scope = 'playlist-read-private user-library-read user-top-read user-read-recently-played user-follow-read user-read-currently-playing'

In [74]:
try:
    
    token = util.prompt_for_user_token(username=username, scope=scope, client_id=client_id, client_secret= client_secret, redirect_uri=redirect_uri)
    sp = spotipy.Spotify(auth=token)

except:

    print('Token not accessible for user: ', username)


In [9]:
def getUserPlaylists(user):

    name_list = []
    id_list = []
    creator_list = []
    offset=0
    playlists = []

    while True:
        results = sp.user_playlists(user, offset=offset)
        playlists += results['items']

        if results['next'] is not None:
            offset += 50
        else:
            break

    for playlist, _ in enumerate(playlists):
        name_list.append(playlists[playlist]['name'])
        id_list.append(playlists[playlist]['id'])
        creator_list.append(playlists[playlist]['owner']['id'])
    
    creator_id_tuple = tuple(zip(creator_list, id_list))

    playlist_dict = dict(name = name_list, id = id_list, creator = creator_list)
        
    playlist_df = pd.DataFrame(playlist_dict)
    multiple_playlist_dict = list(creator_id_tuple)
    playlist_df.to_csv('{}-playlists.csv'.format(user))
    return playlist_df, multiple_playlist_dict

In [10]:
def getTrackIds(user, playlist_id):
    """get track ids from a playlist and return id list."""
    
    ids = []
    playlist = sp.user_playlist(user, playlist_id)
    for item in playlist['tracks']['items']:
        track = item['track']
        ids.append(track['id'])
    return ids

# ids = getTrackIds(my_user, vfar_id)

In [65]:
def analysePlaylist(creator, playlist_id):
    
    tracks = []
    offset = 0
    # Create empty dataframe
    playlist_features_list = ["artist","album","track_name",  "track_id","danceability","energy","key","loudness","mode", "speechiness","instrumentalness","liveness","valence","tempo", "duration_ms","time_signature"]
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
        
    while True:
        results = sp.user_playlist_tracks(creator, playlist_id, offset=offset)
        tracks += results['items']

        if results['next'] is not None:
            offset += 100
        else:
            break

    for track in tracks:        # Create empty dict
        playlist_features = {}        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        time.sleep(0.5)
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
    
    playlist_df.to_csv('{}-{}.csv'.format(creator, playlist_id))
    return playlist_df

In [79]:
def analysePlaylistsList(playlist_tuple_list):
    """function to analyse multiple playlists"""

    for id, _ in enumerate(playlist_tuple_list):
        playlist_df = analysePlaylist(playlist_tuple_list[id][0], playlist_tuple_list[id][1])
        # playlist_df['playlist'] = PLAYLIST NAME

        if id == 0:
            playlist_tuple_df = playlist_df
        else:
            playlist_tuple_df = pd.concat([playlist_tuple_df, playlist_df], ignore_index=True)

    playlist_tuple_df.to_csv('multiple playlists.csv')
    return playlist_tuple_df

In [13]:
def getDiscography(name):
    """get discography of an artist by searching its name and return a dataframe"""

    release_date_list = []
    track_id_list = []   
    tracklist = []
    album_list = []    
    track_album_dict = {}

    results = sp.search(q=name, type='artist')
    artist = results['artists']['items'][0]

    artist_name = artist['name']
    artist_id = artist['id']
    artist_albums = sp.artist_albums(artist_id, album_type='album')
    album_items = artist_albums['items']
    artist_genres = artist['genres']

    artist_singles = sp.artist_albums(artist_id, album_type='single')
    single_items = artist_singles['items']

    album_names = []
    album_id = []
    album_release_date = []

    single_names = []
    single_id = []
    single_release_date = []

    for i, _ in enumerate(artist_albums['items']):
        album_names.append(artist_albums['items'][i]['name'])
        album_id.append(artist_albums['items'][i]['id'])
        album_release_date.append(artist_albums['items'][i]['release_date'])
    

    for i, _ in enumerate(single_items):
        single_names.append(single_items[i]['name'])
        single_id.append(single_items[i]['id'])
        single_release_date.append(single_items[i]['release_date'])
        

    for album, _ in enumerate(album_id):
        tracks = sp.album_tracks(album_id[album])
        tracks = tracks['items']
        album_name = sp.album(album_id[album])['name'] 
        for track, _ in enumerate(tracks):
            track_name = tracks[track]['name']
            track_id_list.append(tracks[track]['id']) 
            tracklist.append(track_name)
            album_list.append(album_name)
            release_date_list.append(album_release_date[album])
    
    for single, _ in enumerate(single_id):
        tracks = sp.album_tracks(single_id[single])
        tracks = tracks['items']
        single_name = sp.album(single_id[single])['name'] 
        for track, _ in enumerate(tracks):
            single_name = tracks[track]['name']
            track_id_list.append(tracks[track]['id']) 
            tracklist.append(single_name)
            album_list.append(single_name)
            release_date_list.append(single_release_date[single])

    


    track_album_dict = {'track': tracklist, 'album': album_list, 'release_date': release_date_list, 'id': track_id_list}  
    df = pd.DataFrame(track_album_dict)
    df.to_csv('{}-discog.csv'.format(artist['name']))
    return df, artist_genres

In [14]:
def getTrackFeatures(id):
    """get features of a track by its ID."""
    
    meta = sp.track(id)
    features = sp.audio_features(id)

    #meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']

    #features
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    key = features[0]['key']
    loudness = features[0]['loudness']
    mode = features[0]['mode']
    speechiness = features[0]['speechiness']
    acousticness = features[0]['acousticness']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    valence = features[0]['valence']
    tempo = features[0]['tempo']
    time_signature = features[0]['time_signature']

    track = [name, album, artist, release_date, length, popularity,danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature]
    return track

In [15]:
def getTracklistFeatures(tracklist):
    """get features of multiple tracks from a list of IDs and return a dataframe"""

  # loop over track ids 
    tracks = []
    for id in range(len(tracklist)):
        track = getTrackFeatures(tracklist[id])
        tracks.append(track)

  # create dataset
    df = pd.DataFrame(tracks, columns = ['name', 'album', 'artist', 'release_date', 'length', 'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speech', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature'])
    df.to_csv("tracklist-features.csv", sep = ',')
    return df

In [16]:
my_playlists_df, my_playlists_list = getUserPlaylists(username)

In [17]:
name = input('Enter Username: ')
playlistid = input('Enter Playlist ID: ')

In [18]:
# playlist_df = analysePlaylist(name, playlistid)

In [82]:
three_playlists = []

In [83]:
three_playlists.append(my_playlists_list[9])
three_playlists.append(my_playlists_list[10])
three_playlists.append(my_playlists_list[11])

In [84]:
three_playlists

[('sidsaxena', '5U5vVOuIEppEbt3GzwDAz9'),
 ('sidsaxena', '6Tz22UKQCnp4rqu6dNzVm0'),
 ('sidsaxena', '2p9y6Nqb6HV76z1xPUmFrN')]

In [85]:
three_df = analysePlaylistsList(three_playlists)

In [86]:
three_df

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,half•alive,still feel.,still feel.,4EcYqXGZ1MOl7gBLcRO9Ya,0.779,0.525,10,-5.756,0,0.0487,0,0.0675,0.6980,120.018,247438,4
1,Better Oblivion Community Center,Better Oblivion Community Center,Service Road,0AODUbeAlQFw4jsyoWtB6U,0.540,0.548,0,-10.303,1,0.0405,3.84e-05,0.1350,0.5250,83.876,224333,4
2,Beeches,Famous Friends,Blackbird,0JkSXJb0s0UG8sazEhUSxF,0.512,0.418,8,-12.131,1,0.0271,0.0234,0.1120,0.5920,85.031,229059,4
3,The Paper Kites,twelvefour,Bleed Confusion,5cEmrGyQcCjzC994LocIjs,0.560,0.406,2,-11.134,1,0.0310,0.612,0.1080,0.0644,74.994,261027,4
4,Elliott Smith,Either/Or: Expanded Edition,Pictures of Me - Live,2wMFRRLOnHwPJIszMVkUhi,0.471,0.299,3,-10.007,0,0.0635,0,0.1270,0.2850,139.664,238211,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
414,You'll Never Get to Heaven,Images,Beyond the Clouds,6vMaCzTnismT6N45MDCMfa,0.633,0.575,7,-9.667,0,0.0298,0.924,0.1050,0.2390,117.520,248681,4
415,Sunday Moon,City Lights,City Lights,36xcAYhiCUnzBoydVmxkO3,0.602,0.303,6,-7.978,1,0.0451,0.091,0.1450,0.3270,135.990,241699,4
416,The Midnight,Kids,Youth,2iEWuzGZX9VJVCANbGw25z,0.248,0.264,2,-17.347,1,0.0650,0.069,0.3130,0.1610,76.742,189648,3
417,Matty,Déjàvu,I'll Gladly Place Myself Below You,7fpEayIv2yPcLnbiRLDMWn,0.518,0.439,5,-9.367,1,0.0295,0.891,0.1100,0.2830,93.003,312360,4
