## Imports

In [11]:
import os
import pandas as pd
import spotipy
import json
from collections import Counter
from spotipy.oauth2 import SpotifyClientCredentials
import pickle
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import lil_matrix

## Spotify Credentials
Set environment variables for your Spotify credentials

In [None]:
cid = os.environ.get("SPOTIPY_CLIENT_ID")
secret = os.environ.get("SPOTIPY_CLIENT_SECRET")
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## Collect information from Spotify Million Playlist Dataset

Download dataset from: https://www.aicrowd.com/challenges/spotify-million-playlist-dataset-challenge

In [None]:
tracks_info = {}#key = track uri, value = list of information about the track
tracks_neighbors = {}#key = track uri, value = Counter of tracks neighbors
track_playlist_count = {} #key = track uri, value = total playlist appearences

with open("data/mpd.slice.0-999.json", "r") as read_file:
    data = json.load(read_file)
for playlist in data.get('playlists'):
    playlist_track_uris = set() #all track uris appearing in this playlist
    for track in playlist.get('tracks'):
        track_info = []
        track_uri = track.get("track_uri")
        if not track_uri in tracks_info.keys():#only retrive track info once
            tracks_info[track_uri] = [
                track.get('track_name'),
                track.get("artist_uri"),
                track.get("artist_name"),
                track.get("album_uri"),
                track.get("album_name")
                ]
        if not track_uri in playlist_track_uris:#do not count multiple entries of same song in playlist twoards track_playlist_count
            if not track_uri in track_playlist_count.keys():#keep track of total playlist occurences
                track_playlist_count[track_uri] = 0
            track_playlist_count[track_uri] += 1
        playlist_track_uris.add(track_uri)
        
    for track in playlist.get('tracks'):#update neighbors for each track in the playlist
        track_uri = track.get("track_uri")
        if not track_uri in tracks_neighbors.keys():
            tracks_neighbors[track_uri] = Counter()
        tracks_neighbors[track_uri].update(playlist_track_uris)

## Get track features from Spotify api
Spotify limits retriving track info to 100 tracks per query

In [None]:
track_features = []
total_track_count =len(tracks_info.keys())
for i in range(0, total_track_count, 100):
    end_index = min(i + 100, total_track_count)
    track_features.extend(spotify.audio_features(list(tracks_info.keys())[i:end_index]))

## Create dataframe

In [None]:
df = pd.DataFrame.from_dict(data=tracks_info, orient = 'index', columns=['title', 'artist uri', 'artist', 'album uri', 'album title'])
df.reset_index(inplace=True)
df.rename(columns={'index': 'uri'}, inplace=True)
df['total playlist appearences'] = track_playlist_count.values()
df['neighboring tracks'] = tracks_neighbors.values()
df = pd.merge(df, pd.DataFrame.from_dict(data=track_features), on="uri")

## Pickle Helper Functions

In [None]:
def pickle_object(output_file_name, object):
    dbfile = open(output_file_name, 'ab')   
    pickle.dump(object, dbfile)                     
    dbfile.close()
    
def unpickle_object(pickle_file_name):
    with open('track_dataframe.pickle', "rb") as input_file:
        unpickled_object = pickle.load(input_file)
    return unpickled_object

pickle the dataframe

In [None]:
pickle_object('track_dataframe.pickle',df)

load the dataframe

In [None]:
df = unpickle_object('track_dataframe.pickle')

In [None]:
display(df)

Unnamed: 0,uri,title,artist uri,artist,album uri,album title,total playlist appearences,neighboring tracks,danceability,energy,...,instrumentalness,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature
0,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,Lose Control (feat. Ciara & Fat Man Scoop),spotify:artist:2wIVse2owClT7go1WT98tk,Missy Elliott,spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,The Cookbook,6,"{'spotify:track:0XUfyU2QviPAs6bxSpXYG4': 4, 's...",0.904,0.813,...,0.006970,0.0471,0.810,125.461,audio_features,0UaMYEvWZi0ZqiDOoHU3YI,https://api.spotify.com/v1/tracks/0UaMYEvWZi0Z...,https://api.spotify.com/v1/audio-analysis/0UaM...,226864,4
1,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,Toxic,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Britney Spears,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,In The Zone,13,"{'spotify:track:0XUfyU2QviPAs6bxSpXYG4': 5, 's...",0.774,0.838,...,0.025000,0.2420,0.924,143.040,audio_features,6I9VzXrHxO9rA9A5euc8Ak,https://api.spotify.com/v1/tracks/6I9VzXrHxO9r...,https://api.spotify.com/v1/audio-analysis/6I9V...,198800,4
2,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,Crazy In Love,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Beyoncé,spotify:album:25hVFAxTlDvXbx2X2QkUkE,Dangerously In Love (Alben für die Ewigkeit),27,"{'spotify:track:0XUfyU2QviPAs6bxSpXYG4': 11, '...",0.664,0.758,...,0.000000,0.0598,0.701,99.259,audio_features,0WqIKmW4BTrj3eJFmnCKMv,https://api.spotify.com/v1/tracks/0WqIKmW4BTrj...,https://api.spotify.com/v1/audio-analysis/0WqI...,235933,4
3,spotify:track:1AWQoqb9bSvzTjaLralEkT,Rock Your Body,spotify:artist:31TPClRtHm23RisEBtV3X7,Justin Timberlake,spotify:album:6QPkyl04rXwTGlGlcYaRoW,Justified,9,"{'spotify:track:0XUfyU2QviPAs6bxSpXYG4': 5, 's...",0.892,0.714,...,0.000234,0.0521,0.817,100.972,audio_features,1AWQoqb9bSvzTjaLralEkT,https://api.spotify.com/v1/tracks/1AWQoqb9bSvz...,https://api.spotify.com/v1/audio-analysis/1AWQ...,267267,4
4,spotify:track:1lzr43nnXAijIGYnCT8M8H,It Wasn't Me,spotify:artist:5EvFsr3kj42KNv97ZEnqij,Shaggy,spotify:album:6NmFmPX56pcLBOFMhIiKvF,Hot Shot,25,"{'spotify:track:0XUfyU2QviPAs6bxSpXYG4': 7, 's...",0.853,0.606,...,0.000000,0.3130,0.654,94.759,audio_features,1lzr43nnXAijIGYnCT8M8H,https://api.spotify.com/v1/tracks/1lzr43nnXAij...,https://api.spotify.com/v1/audio-analysis/1lzr...,227600,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34438,spotify:track:3uCHI1gfOUL5j5swEh0TcH,I Don't Know,spotify:artist:5HCypjplgh5uQezvBpOfXN,Jon D,spotify:album:2KEQtuVl1cYsTYtVRUrNVi,Roots,1,"{'spotify:track:4gFxywaJejXWxo0NjlWzgg': 1, 's...",0.669,0.228,...,0.065000,0.0944,0.402,83.024,audio_features,3uCHI1gfOUL5j5swEh0TcH,https://api.spotify.com/v1/tracks/3uCHI1gfOUL5...,https://api.spotify.com/v1/audio-analysis/3uCH...,189184,4
34439,spotify:track:0P1oO2gREMYUCoOkzYAyFu,The Answer,spotify:artist:0sHN89qak07mnug3LVVjzP,Big Words,spotify:album:5jrsRHRAmetu5e7RRBoxj7,"Hollywood, a Beautiful Coincidence",1,"{'spotify:track:4gFxywaJejXWxo0NjlWzgg': 1, 's...",0.493,0.727,...,0.000000,0.1290,0.289,73.259,audio_features,0P1oO2gREMYUCoOkzYAyFu,https://api.spotify.com/v1/tracks/0P1oO2gREMYU...,https://api.spotify.com/v1/audio-analysis/0P1o...,263680,4
34440,spotify:track:2oM4BuruDnEvk59IvIXCwn,25.22,spotify:artist:6Yv6OBXD6ZQakEljaGaDAk,Allan Rayman,spotify:album:3CbNgBzI7r9o0F6VjH9sTY,Roadhouse 01,1,"{'spotify:track:4gFxywaJejXWxo0NjlWzgg': 1, 's...",0.702,0.524,...,0.055300,0.2980,0.265,140.089,audio_features,2oM4BuruDnEvk59IvIXCwn,https://api.spotify.com/v1/tracks/2oM4BuruDnEv...,https://api.spotify.com/v1/audio-analysis/2oM4...,189213,4
34441,spotify:track:4Ri5TTUgjM96tbQZd5Ua7V,Good Feeling,spotify:artist:77bNdkKYBBmc30CisCA6tE,Jon Jason,spotify:album:2dZ7oVNQBeLlpoUYfbEsJP,Good Feeling,1,"{'spotify:track:4gFxywaJejXWxo0NjlWzgg': 1, 's...",0.509,0.286,...,0.000012,0.1310,0.259,121.633,audio_features,4Ri5TTUgjM96tbQZd5Ua7V,https://api.spotify.com/v1/tracks/4Ri5TTUgjM96...,https://api.spotify.com/v1/audio-analysis/4Ri5...,194720,4
