In [164]:
# spotipy modules
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials

In [165]:
# other libraries
from decouple import config 
import time
from IPython.core.display import clear_output
import csv
from itertools import chain
import string

In [166]:
# visualisation libraries
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [167]:
# environment variables
client_id = config('SPOTIPY_CLIENT_ID')
client_secret = config('SPOTIPY_CLIENT_SECRET')
redirect_uri = 'http://google.com/'

In [168]:
# authorization
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)

In [169]:
username = 'sidsaxena'

In [170]:
scope = 'ugc-image-upload user-read-playback-state user-modify-playback-state user-read-currently-playing streaming app-remote-control user-read-email user-read-private playlist-read-collaborative playlist-modify-public playlist-read-private playlist-modify-private user-library-modify user-library-read user-top-read user-read-playback-position user-read-recently-played user-follow-read user-follow-modify'

In [171]:
def getToken():
    try:

        token = util.prompt_for_user_token(username=username, scope=scope, client_id=client_id, client_secret= client_secret, redirect_uri=redirect_uri, cache_path='/home/sid/development/python/music-analysis/spotify/.cache-sidsaxena')
        sp = spotipy.Spotify(auth=token)

    except:
        print('Token not accessible for user: ', username)
    return sp

In [172]:
sp = getToken()

In [173]:
def getUserPlaylists(user):

    name_list = []
    id_list = []
    creator_list = []
    offset=0
    playlists = []

    while True:
        results = sp.user_playlists(user, offset=offset)
        playlists += results['items']

        if results['next'] is not None:
            offset += 50
        else:
            break

    for playlist, _ in enumerate(playlists):
        name_list.append(playlists[playlist]['name'])
        id_list.append(playlists[playlist]['id'])
        creator_list.append(playlists[playlist]['owner']['id'])
    
    creator_id_tuple = tuple(zip(creator_list, id_list))

    playlist_dict = dict(name = name_list, id = id_list, creator = creator_list)
        
    playlist_df = pd.DataFrame(playlist_dict)
    multiple_playlist_dict = list(creator_id_tuple)
    playlist_df.to_csv('{}-playlists.csv'.format(user))
    return playlist_df, multiple_playlist_dict

In [174]:
def getTrackIds(user, playlist_id):
    """get track ids from a playlist and return id list."""
    
    ids = []
    playlist = sp.user_playlist(user, playlist_id)
    for item in playlist['tracks']['items']:
        track = item['track']
        ids.append(track['id'])
    return ids

# ids = getTrackIds(my_user, vfar_id)

In [175]:
def getTrackIdsFromAlbum(album_id):

    tracklist = []
    track_id_list = []

    results = sp.album_tracks(album_id)
    tracks = results['items']
    album_results = sp.album(album_id)
    album_name = album_results['name'] 
    album_tracks = album_results['tracks']
    for track, _ in enumerate(tracks):
        tracklist.append(tracks[track]['name'])
        track_id_list.append(tracks[track]['id']) 

    return track_id_list

In [176]:
def analysePlaylist(creator, playlist_id):
    
    tracks = []
    offset = 0
    # Create empty dataframe
    playlist_features_list = ['artist','album','track_name', 'track_id', 'popularity', 'genres', 'danceability','energy','key','loudness','mode', 'speechiness','instrumentalness','liveness','valence','tempo', 'duration_ms','time_signature']
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
        
    while True:
        results = sp.user_playlist_tracks(creator, playlist_id, offset=offset)
        tracks += results['items']

        if results['next'] is not None:
            offset += 100
        else:
            break

    for track in tracks:       
        
        artist = track['track']['album']['artists'][0]
        artist_id = artist['id']
        genre = sp.artist(artist_id)
        genres = genre['genres']

        playlist_features = {}        
        playlist_features['artist'] = artist['name']
        playlist_features['album'] = track['track']['album']['name']
        playlist_features['track_name'] = track['track']['name']
        playlist_features['track_id'] = track['track']['id']
        playlist_features['popularity'] = track['track']['popularity']
        playlist_features['genres'] = [genres]
    
        # Get audio features
        time.sleep(0.5)
        audio_features = sp.audio_features(playlist_features['track_id'])[0]
        for feature in playlist_features_list[6:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
    
    playlist_df.to_csv('{}-{}.csv'.format(creator, playlist_id))
    return playlist_df

In [177]:
def analysePlaylistsList(playlist_tuple_list):
    """function to analyse multiple playlists"""

    for id, _ in enumerate(playlist_tuple_list):
        playlist_df = analysePlaylist(playlist_tuple_list[id][0], playlist_tuple_list[id][1])
        # playlist_df['playlist'] = PLAYLIST NAME

        if id == 0:
            playlist_tuple_df = playlist_df
        else:
            playlist_tuple_df = pd.concat([playlist_tuple_df, playlist_df], ignore_index=True)

    playlist_tuple_df.to_csv('multiple playlists.csv')
    return playlist_tuple_df

In [178]:
def getDiscography(name):
    """get discography of an artist by searching its name and return a dataframe and genres in a seperate variable"""

    release_date_list = []
    track_id_list = []   
    tracklist = []
    album_list = []    
    track_album_dict = {}

    album_names = []
    album_id = []
    album_release_date = []

    single_names = []
    single_id = []
    single_release_date = []

    results = sp.search(q=name, type='artist')
    artist = results['artists']['items'][0]

    artist_name = artist['name']
    artist_id = artist['id']
    artist_genres = artist['genres']

    artist_albums = sp.artist_albums(artist_id, album_type='album')
    albums = artist_albums['items']
    
    while artist_albums['next']:
        artist_albums = sp.next(artist_albums)
        albums.extend(artist_albums['items'])

    for album in albums:
        album_names.append(album['name'])
        album_id.append(album['id'])
        album_release_date.append(album['release_date'])

    
    artist_singles = sp.artist_albums(artist_id, album_type='single')
    singles = artist_singles['items']
    
    while artist_singles['next']:
        artist_singles = sp.next(artist_singles)
        singles.extend(artist_singles['items'])

    for single in singles:
        single_names.append(single['name'])
        single_id.append(single['id'])
        single_release_date.append(single['release_date'])


    # MUCH BETTER CODE but need to figure out a way to append release date.
    # for album in album_id:
    #     tracks = sp.album_tracks(album)
    #     tracks = tracks['items']
    #     album_name = sp.album(album)['name']
    #     for track in tracks:
    #         track_name = track['name']
    #         track_id_list.append(track['id'])
    #         tracklist.append(track_name)
    #         album_list.append(album_name)
    #         release_date_list.append(album_release_date[])


    for album, _ in enumerate(album_id):
        tracks = sp.album_tracks(album_id[album])
        tracks = tracks['items']
        album_name = sp.album(album_id[album])['name'] 
        for track, _ in enumerate(tracks):
            track_name = tracks[track]['name']
            track_id_list.append(tracks[track]['id']) 
            tracklist.append(track_name)
            album_list.append(album_name)
            release_date_list.append(album_release_date[album])
    
    for single, _ in enumerate(single_id):
        tracks = sp.album_tracks(single_id[single])
        tracks = tracks['items']
        album_name = sp.album(single_id[single])['name'] 
        for track, _ in enumerate(tracks):
            single_name = tracks[track]['name']
            track_id_list.append(tracks[track]['id']) 
            tracklist.append(single_name)
            album_list.append(album_name)
            release_date_list.append(single_release_date[single])

    


    track_album_dict = {'track': tracklist, 'album': album_list, 'release_date': release_date_list, 'id': track_id_list}  
    df = pd.DataFrame(track_album_dict)
    df.to_csv('{}-discog.csv'.format(artist['name']))
    return df

In [179]:
def getTrackFeatures(id):
    """get features of a track by its ID."""
    
    meta = sp.track(id)
    features = sp.audio_features(id)

    #meta
    name = meta['name']
    album = meta['album']['name']
    artist = meta['album']['artists'][0]['name']
    artist_id = meta['album']['artists'][0]['id']
    genre = sp.artist(artist_id)['genres']
    release_date = meta['album']['release_date']
    length = meta['duration_ms']
    popularity = meta['popularity']

    #features
    danceability = features[0]['danceability']
    energy = features[0]['energy']
    key = features[0]['key']
    loudness = features[0]['loudness']
    mode = features[0]['mode']
    speechiness = features[0]['speechiness']
    acousticness = features[0]['acousticness']
    instrumentalness = features[0]['instrumentalness']
    liveness = features[0]['liveness']
    valence = features[0]['valence']
    tempo = features[0]['tempo']
    time_signature = features[0]['time_signature']

    track = [name, album, artist, release_date, genre, length, popularity,danceability, energy, key, loudness, mode, speechiness, acousticness, instrumentalness, liveness, valence, tempo, time_signature]

    return track

In [180]:
def getTracklistFeatures(tracklist):
    """get features of multiple tracks from a list of IDs and return a dataframe"""

  # loop over track ids 
    tracks = []
    for id in range(len(tracklist)):
        track = getTrackFeatures(tracklist[id])
        tracks.append(track)
        
        # see status
        print('Requesting track {}/{}'.format(id, len(tracklist)))
        # clear output
        clear_output(wait=True)

  # create dataset
    df = pd.DataFrame(tracks, columns = ['track', 'album', 'artist', 'release_date', 'genres', 'length', 'popularity', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature'])
    df.to_csv("tracklist-features.csv", sep = ',')
    return df

In [181]:
def getUriFromDf(df):
    saved_uris = []
    artist_names = df['artist'].values
    track_names = df['track'].values

    for i in range(len(artist_names)):
        artist = artist_names[i]
        track = track_names[i]
        print(f'{i} - {artist}: {track}')
        q = 'artist:{} track: {}'.format(artist, track)
        results = sp.search(q=q, limit=1, type='track')
        if len(results['tracks']['items']) > 0:
            uri = results['tracks']['items'][0]['uri']
        else: 
            i += 1
        saved_uris.append(uri)
        
    return saved_uris

In [182]:
# my_playlists_df, my_playlists_list = getUserPlaylists(username)

In [183]:
# scrobbles_uri = getUriFromDf(scrobbles_unique)

In [184]:
# l = list(csv.reader(open('../spotify/Spreadsheets/scrobbles_unique_uris.csv', 'r')))
# mylist = map(list, zip(*l[0:])) # transpose list
# scrobble_uris = list(chain.from_iterable(mylist))

In [185]:
# np.savetxt("scrobbles_uri_unique.csv", saved_uris, delimiter=",", fmt='%s')

In [186]:
# scrobble_features = getTracklistFeatures(scrobble_uris)
# scrobbles_features.to_csv('scrobbles_features.csv', index=False)

In [187]:
# refresh token
sp = getToken()

In [188]:
scrobbles = pd.read_csv('/home/sid/development/python/music-analysis/lastfm/Spreadsheets/SidSaxena_scrobbles_2020-08-25.csv')
scrobbles = scrobbles.drop(['album', 'artist_mbid', 'album_mbid', 'track_mbid', 'timestamp'], axis=1)

In [189]:
scrobbles['artist'] = scrobbles['artist'].apply(lambda x: string.capwords(x))
scrobbles['track'] = scrobbles['track'].apply(lambda x: string.capwords(x))

In [190]:
scrobbles

Unnamed: 0,artist,track,datetime
0,Matt Berninger,Serpentine Prison,2020-08-25 11:25:07
1,Geotic,Gondolier,2020-08-25 11:19:53
2,Sticky Fingers,Rum Rage,2020-08-25 11:15:45
3,Declan Mckenna,In Blue,2020-08-25 11:10:56
4,Winnetka Bowling League,On The 5,2020-08-25 11:08:02
...,...,...,...
82901,Xylø,Dead End Love,2016-10-08 06:51:20
82902,Xylø,Dead End Love,2016-10-08 06:48:06
82903,Xylø,Dead End Love,2016-10-08 06:45:48
82904,Lauv,Breathe,2016-10-08 06:43:52


In [191]:
scrobbles_unique = pd.read_csv('/home/sid/development/python/music-analysis/lastfm/Spreadsheets/SidSaxena_unique_scrobbles_2020-08-25.csv')
scrobbles_unique

Unnamed: 0,artist,track
0,Matt Berninger,Serpentine Prison
1,Geotic,Gondolier
2,Sticky Fingers,Rum Rage
3,Declan McKenna,In Blue
4,Winnetka Bowling League,On The 5
...,...,...
15707,Tsar B,Escalate
15708,Hydrogen Sea,Only Oleanders
15709,Hydrogen Sea,Wear Out
15710,Jacm,Bon Iver - Perth (JacM Chillstep Remix)


In [192]:
scrobbles_features = pd.read_csv('/home/sid/development/python/music-analysis/spotify/Spreadsheets/scrobbles_features.csv')
scrobbles_features = scrobbles_features.drop_duplicates()
scrobbles_features.rename(columns={'name':'track'}, inplace=True)
scrobbles_features

Unnamed: 0,track,album,artist,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Serpentine Prison,Serpentine Prison,Matt Berninger,2020-05-20,[],272280,55,0.608,0.377,11,-11.553,1,0.0329,0.3780,0.149000,0.1080,0.370,110.079,4
1,Gondolier,Traversa,Geotic,2018-10-19,['experimental ambient'],319500,30,0.874,0.441,11,-9.976,1,0.1160,0.6120,0.812000,0.0853,0.541,120.001,4
2,Rum Rage,Land Of Pleasure,Sticky Fingers,2014-08-01,"['australian reggae fusion', 'reggae fusion']",252299,67,0.594,0.273,5,-9.402,1,0.0291,0.8730,0.000000,0.0895,0.362,131.851,4
3,In Blue,MOOMINVALLEY (Official Soundtrack),Various Artists,2019-04-19,[],294021,53,0.578,0.456,0,-10.155,1,0.0243,0.1680,0.003060,0.2020,0.276,96.979,4
4,On The 5,On The 5,Winnetka Bowling League,2018-08-10,"['indie pop', 'la pop', 'modern rock']",179840,53,0.556,0.602,11,-5.757,0,0.0299,0.6660,0.000059,0.1040,0.402,119.919,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12480,Turn Red,Turn Red,Pmtoyou,2014-04-27,[],261906,8,0.624,0.681,2,-9.107,1,0.0297,0.5470,0.790000,0.1090,0.524,159.036,3
12481,Worry,In Dreams,Hydrogen Sea,2016-09-16,"['belgian indie', 'etherpop']",226733,19,0.645,0.785,10,-8.201,0,0.0724,0.0601,0.095500,0.1090,0.429,97.013,4
12482,Escalate,Tsar B,Tsar B,2016-08-26,['belgian pop'],225506,54,0.455,0.373,5,-9.337,0,0.0491,0.1060,0.001020,0.2170,0.149,112.062,4
12483,Only Oleanders,Only Oleanders,Hydrogen Sea,2014-11-18,"['belgian indie', 'etherpop']",191560,3,0.487,0.507,5,-11.661,0,0.0569,0.1130,0.002020,0.2210,0.450,88.753,4


In [193]:
# # add seatbelts features to scrobbles features
# seatbelts = scrobbles_unique[scrobbles_unique['artist'] == 'The Seatbelts']

# seatbelts_uri = []
# artist = seatbelts['artist'].to_list()
# tracks = seatbelts['track'].to_list()

# l = [i.split('The ', 1)[1] for i in artist]

# for i in range(len(l)):
#     artist = l[i]
#     track = tracks[i]
#     q = 'artist:{} track: {}'.format(artist, track)
#     print(f'{i} - {artist}: {track}')
#     results = sp.search(q=q, limit=1, type='track')
#     if len(results['tracks']['items']) > 0:
#         uri = results['tracks']['items'][0]['uri']
#         seatbelts_uri.append(uri)
#     else: 
#         i += 1

# seatbelts_features = getTracklistFeatures(seatbelts_uri)
# scrobbles_features.append(seatbelts_features)

In [225]:
scrobbles_features['artist'] = scrobbles_features['artist'].apply(lambda x: string.capwords(x)) 
scrobbles_features['track'] = scrobbles_features['track'].apply(lambda x: string.capwords(x))

In [226]:
merged = pd.merge(scrobbles, scrobbles_features)
merged = merged.sort_values('datetime', ascending=False)

In [196]:
# merged.to_csv('Scrobbles with Features (66K).csv', index=False)

In [227]:
merged

Unnamed: 0,artist,track,datetime,album,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Matt Berninger,Serpentine Prison,2020-08-25 11:25:07,Serpentine Prison,2020-05-20,[],272280,55,0.608,0.377,11,-11.553,1,0.0329,0.378,0.149000,0.1080,0.370,110.079,4
2,Geotic,Gondolier,2020-08-25 11:19:53,Traversa,2018-10-19,['experimental ambient'],319500,30,0.874,0.441,11,-9.976,1,0.1160,0.612,0.812000,0.0853,0.541,120.001,4
4,Sticky Fingers,Rum Rage,2020-08-25 11:15:45,Land Of Pleasure,2014-08-01,"['australian reggae fusion', 'reggae fusion']",252299,67,0.594,0.273,5,-9.402,1,0.0291,0.873,0.000000,0.0895,0.362,131.851,4
6,Winnetka Bowling League,On The 5,2020-08-25 11:08:02,On The 5,2018-08-10,"['indie pop', 'la pop', 'modern rock']",179840,53,0.556,0.602,11,-5.757,0,0.0299,0.666,0.000059,0.1040,0.402,119.919,4
8,Mad Season,Wake Up,2020-08-25 11:00:31,Above (Deluxe Edition),1995,"['alternative metal', 'alternative rock', 'blu...",456026,54,0.339,0.276,7,-10.746,1,0.0285,0.141,0.009060,0.1000,0.308,83.181,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51454,Michl,Gone,2016-10-08 07:02:33,Michl,2016-08-12,"['electropop', 'indie poptimism', 'vapor soul']",172000,45,0.658,0.525,10,-6.903,0,0.1500,0.556,0.000058,0.0883,0.265,82.006,4
29415,Xylø,Dead End Love,2016-10-08 06:51:20,Dead End Love,2016-09-30,"['electropop', 'indie electropop', 'indie popt...",227765,26,0.589,0.765,6,-5.755,0,0.0351,0.021,0.000079,0.1990,0.667,151.958,4
29416,Xylø,Dead End Love,2016-10-08 06:48:06,Dead End Love,2016-09-30,"['electropop', 'indie electropop', 'indie popt...",227765,26,0.589,0.765,6,-5.755,0,0.0351,0.021,0.000079,0.1990,0.667,151.958,4
29417,Xylø,Dead End Love,2016-10-08 06:45:48,Dead End Love,2016-09-30,"['electropop', 'indie electropop', 'indie popt...",227765,26,0.589,0.765,6,-5.755,0,0.0351,0.021,0.000079,0.1990,0.667,151.958,4


In [198]:
mergeunion = pd.merge(scrobbles, scrobbles_features, how='outer')

In [199]:
# mergeunion.to_csv('Merged Union of Scrobbles and Features.csv', index=False)

In [200]:
mergeunion

Unnamed: 0,artist,track,datetime,album,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,Matt Berninger,Serpentine Prison,2020-08-25 11:25:07,Serpentine Prison,2020-05-20,[],272280.0,55.0,0.608,0.377,11.0,-11.553,1.0,0.0329,0.3780,0.149000,0.1080,0.3700,110.079,4.0
1,Matt Berninger,Serpentine Prison,2020-07-05 20:14:41,Serpentine Prison,2020-05-20,[],272280.0,55.0,0.608,0.377,11.0,-11.553,1.0,0.0329,0.3780,0.149000,0.1080,0.3700,110.079,4.0
2,Geotic,Gondolier,2020-08-25 11:19:53,Traversa,2018-10-19,['experimental ambient'],319500.0,30.0,0.874,0.441,11.0,-9.976,1.0,0.1160,0.6120,0.812000,0.0853,0.5410,120.001,4.0
3,Geotic,Gondolier,2020-07-29 22:36:04,Traversa,2018-10-19,['experimental ambient'],319500.0,30.0,0.874,0.441,11.0,-9.976,1.0,0.1160,0.6120,0.812000,0.0853,0.5410,120.001,4.0
4,Sticky Fingers,Rum Rage,2020-08-25 11:15:45,Land Of Pleasure,2014-08-01,"['australian reggae fusion', 'reggae fusion']",252299.0,67.0,0.594,0.273,5.0,-9.402,1.0,0.0291,0.8730,0.000000,0.0895,0.3620,131.851,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83961,Various Artists,Überflieger,,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],530750.0,25.0,0.649,0.864,10.0,-6.659,0.0,0.0497,0.0172,0.864000,0.0308,0.5380,127.987,4.0
83962,Various Artists,Slumber Party,,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],233587.0,37.0,0.533,0.936,1.0,-4.626,1.0,0.0447,0.0143,0.721000,0.0622,0.4210,128.018,4.0
83963,Various Artists,Ragtime Cat (feat. Lilja Bloom),,"Electro Swing Fever, Vol. 3",2014-02-17,[],181000.0,22.0,0.736,0.715,5.0,-4.114,0.0,0.0412,0.0120,0.000796,0.2180,0.8510,119.976,4.0
83964,Yoe Mase,Nothing More (deaf Kev Remix),,Nothing More (Deaf Kev Remix),2015-11-06,"['chillstep', 'pop edm']",376200.0,37.0,0.383,0.831,1.0,-5.063,1.0,0.0511,0.0123,0.066100,0.1020,0.0912,199.850,4.0


In [201]:
failed = pd.concat([mergeunion, merged, merged]).drop_duplicates(keep=False)

In [202]:
# failed.to_csv("Failed Results (Union minus Merged).csv", index=False)

In [203]:
failed

Unnamed: 0,artist,track,datetime,album,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
6,Declan Mckenna,In Blue,2020-08-25 11:10:56,,,,,,,,,,,,,,,,,
7,Declan Mckenna,In Blue,2020-08-07 22:31:03,,,,,,,,,,,,,,,,,
8,Declan Mckenna,In Blue,2020-03-27 23:02:03,,,,,,,,,,,,,,,,,
126,Everything Everything,Don't Let It Bring You Down - Live At Bbc Maid...,2020-08-24 13:17:44,,,,,,,,,,,,,,,,,
130,Gotham Knights,World Premiere Trailer,2020-08-24 10:37:20,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83961,Various Artists,Überflieger,,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],530750.0,25.0,0.649,0.864,10.0,-6.659,0.0,0.0497,0.0172,0.864000,0.0308,0.5380,127.987,4.0
83962,Various Artists,Slumber Party,,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],233587.0,37.0,0.533,0.936,1.0,-4.626,1.0,0.0447,0.0143,0.721000,0.0622,0.4210,128.018,4.0
83963,Various Artists,Ragtime Cat (feat. Lilja Bloom),,"Electro Swing Fever, Vol. 3",2014-02-17,[],181000.0,22.0,0.736,0.715,5.0,-4.114,0.0,0.0412,0.0120,0.000796,0.2180,0.8510,119.976,4.0
83964,Yoe Mase,Nothing More (deaf Kev Remix),,Nothing More (Deaf Kev Remix),2015-11-06,"['chillstep', 'pop edm']",376200.0,37.0,0.383,0.831,1.0,-5.063,1.0,0.0511,0.0123,0.066100,0.1020,0.0912,199.850,4.0


In [204]:
failed_not_features = failed[failed['length'].isnull()]

In [205]:
# failed_not_features.to_csv('Failed Scrobbles without Features.csv', index=False)

In [206]:
failed_not_features

Unnamed: 0,artist,track,datetime,album,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
6,Declan Mckenna,In Blue,2020-08-25 11:10:56,,,,,,,,,,,,,,,,,
7,Declan Mckenna,In Blue,2020-08-07 22:31:03,,,,,,,,,,,,,,,,,
8,Declan Mckenna,In Blue,2020-03-27 23:02:03,,,,,,,,,,,,,,,,,
126,Everything Everything,Don't Let It Bring You Down - Live At Bbc Maid...,2020-08-24 13:17:44,,,,,,,,,,,,,,,,,
130,Gotham Knights,World Premiere Trailer,2020-08-24 10:37:20,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82907,Unday Records,Hydrogen Sea - Free Falling,2016-10-08 10:53:04,,,,,,,,,,,,,,,,,
82908,Unday Records,Hydrogen Sea - Lead Us Home,2016-10-08 10:49:37,,,,,,,,,,,,,,,,,
82909,Unday Records,Hydrogen Sea - Another Skin,2016-10-08 10:37:19,,,,,,,,,,,,,,,,,
82914,Jacm,Bon Iver - Perth (jacm Chillstep Remix),2016-10-08 07:42:08,,,,,,,,,,,,,,,,,


In [207]:
failed_features = failed[failed['length'].notnull()]
failed_features = failed_features.drop('datetime', axis=1)

In [208]:
# failed_features.to_csv('Failed Features.csv', index=False)

In [209]:
failed_features

Unnamed: 0,artist,track,album,release_date,genres,length,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
82916,Various Artists,In Blue,MOOMINVALLEY (Official Soundtrack),2019-04-19,[],294021.0,53.0,0.578,0.456,0.0,-10.155,1.0,0.0243,0.16800,0.003060,0.2020,0.2760,96.979,4.0
82917,Various Artists,See You Space Cowboys Not Final Mountain Root,COWBOY BEBOP (Original Motion Picture Soundtra...,1999-05-01,[],355866.0,35.0,0.489,0.695,4.0,-9.893,0.0,0.0311,0.00015,0.221000,0.0935,0.5740,80.499,4.0
82918,Gustaf Grefberg,A Way Out,A Way Out (Original Game Soundtrack),2020-02-05,[],210859.0,16.0,0.231,0.241,9.0,-15.071,0.0,0.0331,0.78000,0.943000,0.0962,0.0620,56.020,3.0
82919,Foreign Fields,Don’t Give Up - Renewed,Light On Your Face (Renewed) / Don’t Give Up (...,2020-08-06,"['ambient folk', 'chamber pop', 'indie anthem-...",412626.0,41.0,0.625,0.259,0.0,-16.307,0.0,0.0849,0.16800,0.686000,0.1110,0.1430,123.063,4.0
82920,Various Artists,Ramona,Scott Pilgrim vs. the World (Original Motion P...,2010-01-01,[],261506.0,48.0,0.413,0.565,9.0,-7.200,0.0,0.0288,0.03590,0.768000,0.1390,0.3410,123.044,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83961,Various Artists,Überflieger,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],530750.0,25.0,0.649,0.864,10.0,-6.659,0.0,0.0497,0.01720,0.864000,0.0308,0.5380,127.987,4.0
83962,Various Artists,Slumber Party,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],233587.0,37.0,0.533,0.936,1.0,-4.626,1.0,0.0447,0.01430,0.721000,0.0622,0.4210,128.018,4.0
83963,Various Artists,Ragtime Cat (feat. Lilja Bloom),"Electro Swing Fever, Vol. 3",2014-02-17,[],181000.0,22.0,0.736,0.715,5.0,-4.114,0.0,0.0412,0.01200,0.000796,0.2180,0.8510,119.976,4.0
83964,Yoe Mase,Nothing More (deaf Kev Remix),Nothing More (Deaf Kev Remix),2015-11-06,"['chillstep', 'pop edm']",376200.0,37.0,0.383,0.831,1.0,-5.063,1.0,0.0511,0.01230,0.066100,0.1020,0.0912,199.850,4.0


In [210]:
test = pd.merge(failed_features, failed_not_features, on='track')
test

Unnamed: 0,artist_x,track,album_x,release_date_x,genres_x,length_x,popularity_x,danceability_x,energy_x,key_x,...,key_y,loudness_y,mode_y,speechiness_y,acousticness_y,instrumentalness_y,liveness_y,valence_y,tempo_y,time_signature_y
0,Various Artists,In Blue,MOOMINVALLEY (Official Soundtrack),2019-04-19,[],294021.0,53.0,0.578,0.456,0.0,...,,,,,,,,,,
1,Various Artists,In Blue,MOOMINVALLEY (Official Soundtrack),2019-04-19,[],294021.0,53.0,0.578,0.456,0.0,...,,,,,,,,,,
2,Various Artists,In Blue,MOOMINVALLEY (Official Soundtrack),2019-04-19,[],294021.0,53.0,0.578,0.456,0.0,...,,,,,,,,,,
3,Various Artists,See You Space Cowboys Not Final Mountain Root,COWBOY BEBOP (Original Motion Picture Soundtra...,1999-05-01,[],355866.0,35.0,0.489,0.695,4.0,...,,,,,,,,,,
4,Various Artists,See You Space Cowboys Not Final Mountain Root,COWBOY BEBOP (Original Motion Picture Soundtra...,1999-05-01,[],355866.0,35.0,0.489,0.695,4.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2456,Various Artists,Fantasy,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],211000.0,21.0,0.868,0.498,7.0,...,,,,,,,,,,
2457,Various Artists,Überflieger,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],530750.0,25.0,0.649,0.864,10.0,...,,,,,,,,,,
2458,Various Artists,Slumber Party,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],233587.0,37.0,0.533,0.936,1.0,...,,,,,,,,,,
2459,Various Artists,Slumber Party,MrSuicideSheep Presents - Taking You Higher,2015-10-09,[],233587.0,37.0,0.533,0.936,1.0,...,,,,,,,,,,


In [211]:
artist_x = test['artist_x'].to_list()
artist_y = test['artist_y'].to_list()

In [212]:
(scrobbles_features['artist'] == 'Various Artists').value_counts()

False    12224
True       261
Name: artist, dtype: int64

In [213]:
test_no_y = test.drop(test.columns[test.columns.str.contains('y',case = False)],axis = 1)

In [214]:
def remove_x(word):
    return (word.split('_')[0])

In [215]:
test_no_y = test_no_y.rename(remove_x, axis='columns')
test_no_y = test_no_y.drop('datetime', axis=1)

In [216]:
test1 = test.set_index('length_x')
test_dict = test1['artist_y'].to_dict()

In [217]:
# scrobbles_features['artist'] = scrobbles_features['length'].map(test_dict).fillna(scrobbles_features['artist'])

# scrobbles_features['artist'] = scrobbles_features['artist'].apply(lambda x: artists_dict[x])

# df['Group'] = df['Group'].map(df1.set_index('Group')['Hotel'])

# hotel_dict = df2.set_index('Group').to_dict()
# df1['Group'] = df1['Group'].apply(lambda x: hotel_dict[x])

In [218]:
merged['artist'].value_counts()

John Mayer                        5635
Radiohead                         5211
Porcupine Tree                    1881
The Paper Kites                   1747
Steven Wilson                     1538
                                  ... 
Vivian Darkbloom                     1
The Golden Islands                   1
David Ross Lawn                      1
Kenneth Pattengale & Joey Ryan       1
Eefje De Visser                      1
Name: artist, Length: 3995, dtype: int64