In [1]:
# spotipy modules
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
# other libraries
from decouple import config 
import time
from IPython.core.display import clear_output
import csv
from itertools import chain

In [3]:
# visualisation libraries
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# environment variables
client_id = config('SPOTIPY_CLIENT_ID')
client_secret = config('SPOTIPY_CLIENT_SECRET')
redirect_uri = 'http://google.com/'

In [5]:
# authorization
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)

In [6]:
username = 'sidsaxena'

In [7]:
scope = 'ugc-image-upload user-read-playback-state user-modify-playback-state user-read-currently-playing streaming app-remote-control user-read-email user-read-private playlist-read-collaborative playlist-modify-public playlist-read-private playlist-modify-private user-library-modify user-library-read user-top-read user-read-playback-position user-read-recently-played user-follow-read user-follow-modify'

In [8]:
def getToken():
    try:

        token = util.prompt_for_user_token(username=username, scope=scope, client_id=client_id, client_secret= client_secret, redirect_uri=redirect_uri, cache_path='/home/sid/development/python/music-analysis/spotify/.cache-sidsaxena')
        sp = spotipy.Spotify(auth=token)

    except:
        print('Token not accessible for user: ', username)
    return sp

In [9]:
sp = getToken()

In [10]:
def getUserPlaylists(user):

    name_list = []
    id_list = []
    creator_list = []
    offset=0
    playlists = []

    while True:
        results = sp.user_playlists(user, offset=offset)
        playlists += results['items']

        if results['next'] is not None:
            offset += 50
        else:
            break

    for playlist, _ in enumerate(playlists):
        name_list.append(playlists[playlist]['name'])
        id_list.append(playlists[playlist]['id'])
        creator_list.append(playlists[playlist]['owner']['id'])
    
    creator_id_tuple = tuple(zip(creator_list, id_list))

    playlist_dict = dict(name = name_list, id = id_list, creator = creator_list)
        
    playlist_df = pd.DataFrame(playlist_dict)
    multiple_playlist_dict = list(creator_id_tuple)
    playlist_df.to_csv('{}-playlists.csv'.format(user))
    return playlist_df, multiple_playlist_dict

In [11]:
def getTrackIds(user, playlist_id):
    """get track ids from a playlist and return id list."""
    
    ids = []
    playlist = sp.user_playlist(user, playlist_id)
    for item in playlist['tracks']['items']:
        track = item['track']
        ids.append(track['id'])
    return ids

# ids = getTrackIds(my_user, vfar_id)

In [12]:
def getTrackIdsFromAlbum(album_id):

    tracklist = []
    track_id_list = []

    results = sp.album_tracks(album_id)
    tracks = results['items']
    album_results = sp.album(album_id)
    album_name = album_results['name'] 
    album_tracks = album_results['tracks']
    for track, _ in enumerate(tracks):
        tracklist.append(tracks[track]['name'])
        track_id_list.append(tracks[track]['id']) 

    return track_id_list

In [13]:
def analysePlaylist(creator, playlist_id):
    
    tracks = []
    offset = 0
    # Create empty dataframe
    playlist_features_list = ['artist','album','track_name', 'track_id', 'popularity', 'genres', 'danceability','energy','key','loudness','mode', 'speechiness','instrumentalness','liveness','valence','tempo', 'duration_ms','time_signature']
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
        
    while True:
        results = sp.user_playlist_tracks(creator, playlist_id, offset=offset)
        tracks += results['items']

        if results['next'] is not None:
            offset += 100
        else:
            break

    for track in tracks:       
        
        artist = track['track']['album']['artists'][0]
        artist_id = artist['id']
        genre = sp.artist(artist_id)
        genres = genre['genres']

        playlist_features = {}        
        playlist_features['artist'] = artist['name']
        playlist_features['album'] = track['track']['album']['name']
        playlist_features['track_name'] = track['track']['name']
        playlist_features['track_id'] = track['track']['id']
        playlist_features['popularity'] = track['track']['popularity']
        playlist_features['genres'] = [genres]
    
        # Get audio features
        time.sleep(0.5)
        audio_features = sp.audio_features(playlist_features['track_id'])[0]
        for feature in playlist_features_list[6:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
    
    playlist_df.to_csv('{}-{}.csv'.format(creator, playlist_id))
    return playlist_df

In [14]:
def analysePlaylistsList(playlist_tuple_list):
    """function to analyse multiple playlists"""

    for id, _ in enumerate(playlist_tuple_list):
        playlist_df = analysePlaylist(playlist_tuple_list[id][0], playlist_tuple_list[id][1])
        # playlist_df['playlist'] = PLAYLIST NAME

        if id == 0:
            playlist_tuple_df = playlist_df
        else:
            playlist_tuple_df = pd.concat([playlist_tuple_df, playlist_df], ignore_index=True)

    playlist_tuple_df.to_csv('multiple playlists.csv')
    return playlist_tuple_df

In [15]:
def getDiscography(name):
    """get discography of an artist by searching its name and return a dataframe and genres in a seperate variable"""

    release_date_list = []
    track_id_list = []   
    tracklist = []
    album_list = []    
    track_album_dict = {}

    album_names = []
    album_id = []
    album_release_date = []

    single_names = []
    single_id = []
    single_release_date = []

    results = sp.search(q=name, type='artist')
    artist = results['artists']['items'][0]

    artist_name = artist['name']
    artist_id = artist['id']
    artist_genres = artist['genres']

    artist_albums = sp.artist_albums(artist_id, album_type='album')
    albums = artist_albums['items']
    
    while artist_albums['next']:
        artist_albums = sp.next(artist_albums)
        albums.extend(artist_albums['items'])

    for album in albums:
        album_names.append(album['name'])
        album_id.append(album['id'])
        album_release_date.append(album['release_date'])

    
    artist_singles = sp.artist_albums(artist_id, album_type='single')
    singles = artist_singles['items']
    
    while artist_singles['next']:
        artist_singles = sp.next(artist_singles)
        singles.extend(artist_singles['items'])

    for single in singles:
        single_names.append(single['name'])
        single_id.append(single['id'])
        single_release_date.append(single['release_date'])


    # MUCH BETTER CODE but need to figure out a way to append release date.
    # for album in album_id:
    #     tracks = sp.album_tracks(album)
    #     tracks = tracks['items']
    #     album_name = sp.album(album)['name']
    #     for track in tracks:
    #         track_name = track['name']
    #         track_id_list.append(track['id'])
    #         tracklist.append(track_name)
    #         album_list.append(album_name)
    #         release_date_list.append(album_release_date[])


    for album, _ in enumerate(album_id):
        tracks = sp.album_tracks(album_id[album])
        tracks = tracks['items']
        album_name = sp.album(album_id[album])['name'] 
        for track, _ in enumerate(tracks):
            track_name = tracks[track]['name']
            track_id_list.append(tracks[track]['id']) 
            tracklist.append(track_name)
            album_list.append(album_name)
            release_date_list.append(album_release_date[album])
    
    for single, _ in enumerate(single_id):
        tracks = sp.album_tracks(single_id[single])
        tracks = tracks['items']
        album_name = sp.album(single_id[single])['name'] 
        for track, _ in enumerate(tracks):
            single_name = tracks[track]['name']
            track_id_list.append(tracks[track]['id']) 
            tracklist.append(single_name)
            album_list.append(album_name)
            release_date_list.append(single_release_date[single])

    


    track_album_dict = {'track': tracklist, 'album': album_list, 'release_date': release_date_list, 'id': track_id_list}  
    df = pd.DataFrame(track_album_dict)
    df.to_csv('{}-discog.csv'.format(artist['name']))
    return df

In [16]:
def getTrackFeatures(id):
    """get features of a track by its ID."""
    
    meta = sp.track(id)
    features = sp.audio_features(id)

    #meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    artist_id = meta['album']['artists'][0]['id']
    genre = sp.artist(artist_id)['genres']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']

    #features
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    key = features[0]['key']
    loudness = features[0]['loudness']
    mode = features[0]['mode']
    speechiness = features[0]['speechiness']
    acousticness = features[0]['acousticness']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    valence = features[0]['valence']
    tempo = features[0]['tempo']
    time_signature = features[0]['time_signature']

    track = [name, album, artist, release_date, genre, length, popularity,danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature]

    return track

In [17]:
def getTracklistFeatures(tracklist):
    """get features of multiple tracks from a list of IDs and return a dataframe"""

  # loop over track ids 
    tracks = []
    for id in range(len(tracklist)):
        track = getTrackFeatures(tracklist[id])
        tracks.append(track)
        
        # see status
        print('Requesting track {}/{}'.format(id, len(tracklist)))
        # clear output
        clear_output(wait=True)

  # create dataset
    df = pd.DataFrame(tracks, columns = ['track', 'album', 'artist', 'release_date', 'genres', 'length', 'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature'])
    df.to_csv("tracklist-features.csv", sep = ',')
    return df

In [18]:
def getUriFromDf(df):
    saved_uris = []
    artist_names = df['artist'].values
    track_names = df['track'].values

    for i in range(len(artist_names)):
        artist = artist_names[i]
        track = track_names[i]
        print(f'{i} - {artist}: {track}')
        q = 'artist:{} track: {}'.format(artist, track)
        results = sp.search(q=q, limit=1, type='track')
        if len(results['tracks']['items']) > 0:
            uri = results['tracks']['items'][0]['uri']
        else: 
            i += 1
        saved_uris.append(uri)
        
    return saved_uris

In [19]:
# my_playlists_df, my_playlists_list = getUserPlaylists(username)

In [20]:
# scrobbles_uri = getUriFromDf(scrobbles_unique)

In [21]:
# l = list(csv.reader(open('../spotify/Spreadsheets/scrobbles_unique_uris.csv', 'r')))
# mylist = map(list, zip(*l[0:])) # transpose list
# scrobble_uris = list(chain.from_iterable(mylist))

In [22]:
# np.savetxt("scrobbles_uri_unique.csv", saved_uris, delimiter=",", fmt='%s')

In [23]:
# scrobble_features = getTracklistFeatures(scrobble_uris)
# scrobble_features.to_csv('scrobbles_features.csv', index=False)

In [24]:
# refresh token
sp = getToken()

In [230]:
scrobbles = pd.read_csv('/home/sid/development/python/music-analysis/lastfm/Spreadsheets/SidSaxena_scrobbles_2020-08-25.csv')
scrobbles = scrobbles.drop(['album', 'artist_mbid', 'album_mbid', 'track_mbid', 'timestamp'], axis=1)
scrobbles

Unnamed: 0,artist,track,datetime
0,Matt Berninger,Serpentine Prison,2020-08-25 11:25:07
1,Geotic,Gondolier,2020-08-25 11:19:53
2,Sticky Fingers,Rum Rage,2020-08-25 11:15:45
3,Declan McKenna,In Blue,2020-08-25 11:10:56
4,Winnetka Bowling League,On The 5,2020-08-25 11:08:02
...,...,...,...
82901,XYLØ,Dead End Love,2016-10-08 06:51:20
82902,XYLØ,Dead End Love,2016-10-08 06:48:06
82903,XYLØ,Dead End Love,2016-10-08 06:45:48
82904,Lauv,Breathe,2016-10-08 06:43:52


In [26]:
scrobbles_unique = pd.read_csv('/home/sid/development/python/music-analysis/lastfm/Spreadsheets/SidSaxena_unique_scrobbles_2020-08-25.csv')
scrobbles_unique


Unnamed: 0,artist,track
0,Matt Berninger,Serpentine Prison
1,Geotic,Gondolier
2,Sticky Fingers,Rum Rage
3,Declan McKenna,In Blue
4,Winnetka Bowling League,On The 5
...,...,...
15707,Tsar B,Escalate
15708,Hydrogen Sea,Only Oleanders
15709,Hydrogen Sea,Wear Out
15710,Jacm,Bon Iver - Perth (JacM Chillstep Remix)


In [64]:
scrobbles_features = pd.read_csv('/home/sid/development/python/music-analysis/spotify/Spreadsheets/scrobbles_features.csv')
scrobbles_features = scrobbles_features.drop_duplicates()
scrobbles_features.rename(columns={'name':'track'}, inplace=True)
scrobbles_features

Unnamed: 0,track,album,artist,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Serpentine Prison,Serpentine Prison,Matt Berninger,2020-05-20,[],272280,55,0.608,0.377,11,-11.553,1,0.0329,0.3780,0.149000,0.1080,0.370,110.079,4
1,Gondolier,Traversa,Geotic,2018-10-19,['experimental ambient'],319500,30,0.874,0.441,11,-9.976,1,0.1160,0.6120,0.812000,0.0853,0.541,120.001,4
2,Rum Rage,Land Of Pleasure,Sticky Fingers,2014-08-01,"['australian reggae fusion', 'reggae fusion']",252299,67,0.594,0.273,5,-9.402,1,0.0291,0.8730,0.000000,0.0895,0.362,131.851,4
3,In Blue,MOOMINVALLEY (Official Soundtrack),Various Artists,2019-04-19,[],294021,53,0.578,0.456,0,-10.155,1,0.0243,0.1680,0.003060,0.2020,0.276,96.979,4
4,On The 5,On The 5,Winnetka Bowling League,2018-08-10,"['indie pop', 'la pop', 'modern rock']",179840,53,0.556,0.602,11,-5.757,0,0.0299,0.6660,0.000059,0.1040,0.402,119.919,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15683,Turn Red,Turn Red,PMtoyou,2014-04-27,[],261906,8,0.624,0.681,2,-9.107,1,0.0297,0.5470,0.790000,0.1090,0.524,159.036,3
15691,Worry,In Dreams,Hydrogen Sea,2016-09-16,"['belgian indie', 'etherpop']",226733,19,0.645,0.785,10,-8.201,0,0.0724,0.0601,0.095500,0.1090,0.429,97.013,4
15692,Escalate,Tsar B,Tsar B,2016-08-26,['belgian pop'],225506,54,0.455,0.373,5,-9.337,0,0.0491,0.1060,0.001020,0.2170,0.149,112.062,4
15693,Only Oleanders,Only Oleanders,Hydrogen Sea,2014-11-18,"['belgian indie', 'etherpop']",191560,3,0.487,0.507,5,-11.661,0,0.0569,0.1130,0.002020,0.2210,0.450,88.753,4


In [286]:
merged = pd.merge(scrobbles, scrobbles_features)
merged = merged.sort_values('datetime', ascending=False)
merged.to_csv('Scrobbles with Features (56K).csv', index=False)

In [287]:
merged

Unnamed: 0,artist,track,datetime,album,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Matt Berninger,Serpentine Prison,2020-08-25 11:25:07,Serpentine Prison,2020-05-20,[],272280,55,0.608,0.377,11,-11.553,1,0.0329,0.378,0.149000,0.1080,0.370,110.079,4
2,Geotic,Gondolier,2020-08-25 11:19:53,Traversa,2018-10-19,['experimental ambient'],319500,30,0.874,0.441,11,-9.976,1,0.1160,0.612,0.812000,0.0853,0.541,120.001,4
4,Sticky Fingers,Rum Rage,2020-08-25 11:15:45,Land Of Pleasure,2014-08-01,"['australian reggae fusion', 'reggae fusion']",252299,67,0.594,0.273,5,-9.402,1,0.0291,0.873,0.000000,0.0895,0.362,131.851,4
6,Winnetka Bowling League,On The 5,2020-08-25 11:08:02,On The 5,2018-08-10,"['indie pop', 'la pop', 'modern rock']",179840,53,0.556,0.602,11,-5.757,0,0.0299,0.666,0.000059,0.1040,0.402,119.919,4
8,Mad Season,Wake Up,2020-08-25 11:00:31,Above (Deluxe Edition),1995,"['alternative metal', 'alternative rock', 'blu...",456026,54,0.339,0.276,7,-10.746,1,0.0285,0.141,0.009060,0.1000,0.308,83.181,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43431,Michl,Gone,2016-10-08 07:02:33,Michl,2016-08-12,"['electropop', 'indie poptimism', 'vapor soul']",172000,45,0.658,0.525,10,-6.903,0,0.1500,0.556,0.000058,0.0883,0.265,82.006,4
24898,XYLØ,Dead End Love,2016-10-08 06:51:20,Dead End Love,2016-09-30,"['electropop', 'indie electropop', 'indie popt...",227765,26,0.589,0.765,6,-5.755,0,0.0351,0.021,0.000079,0.1990,0.667,151.958,4
24899,XYLØ,Dead End Love,2016-10-08 06:48:06,Dead End Love,2016-09-30,"['electropop', 'indie electropop', 'indie popt...",227765,26,0.589,0.765,6,-5.755,0,0.0351,0.021,0.000079,0.1990,0.667,151.958,4
24900,XYLØ,Dead End Love,2016-10-08 06:45:48,Dead End Love,2016-09-30,"['electropop', 'indie electropop', 'indie popt...",227765,26,0.589,0.765,6,-5.755,0,0.0351,0.021,0.000079,0.1990,0.667,151.958,4
