In [1]:
import numpy as np
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from spotipy.oauth2 import SpotifyOAuth
from sklearn.cluster import KMeans
import os

wd = os.getcwd()
head, tail = wd.rsplit('\\', 1)
head = head.replace('\\', '/')

try:
    from configparser import ConfigParser
except ImportError:
    from ConfigParser import ConfigParser

config = ConfigParser()
config.read(head + '/config.ini')

client_id = config['DEFAULT']['client_id']
client_secret = config['DEFAULT']['client_secret']
redirect_uri = config['DEFAULT']['redirect_uri']
username = config['DEFAULT']['username']

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
scope = 'user-library-read'
token = util.prompt_for_user_token(username, scope, client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri)
sp = spotipy.Spotify(auth=token)

In [3]:
def analyze_playlist(creator, playlist_id):
    
    # Create empty dataframe
    playlist_features_list = ["artist", "album", "track_name", "track_id", 
                             "danceability", "energy", "key", "loudness", "mode", "speechiness",
                             "acousticness","instrumentalness", "liveness", "valence", "tempo", "duration_ms", 
                                "time_signature"]
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Create empty dict
    playlist_features = {}
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
        
    return playlist_df

In [4]:
def analyze_playlist_dict(playlist_dict):
    
    # Loop through every playlist in the dict and analyze it
    for i, (key, val) in enumerate(playlist_dict.items()):
        playlist_df = analyze_playlist(*val)
        # Add a playlist column so that we can see which playlist a track belongs too
        playlist_df["playlist"] = key
        # Create or concat df
        if i == 0:
            playlist_dict_df = playlist_df
        else:
            playlist_dict_df = pd.concat([playlist_dict_df, playlist_df], ignore_index = True)
            
    return playlist_dict_df

In [50]:
# Scraping a bunch of large playlists to build up a set up song features
playlist_dict = {
    "Largest_Playlist" : ("gabrieledepaor41", "2sRZldX6n9oaII70OoO3zB"), 
    "The_Largest_Playlist_on_Spotify" : ("Jacob Diehl", "1XhVM7jWPrGLTiNiAy97Za"),
    "Largest_Aesthetic_Rap_Playlist" : ("waiuwufu", "19S6IBSHtRvnXTDilok2MP"),
    "Largest_clean_work_playlist" : ("Wilson White","0hcjRDXanWbhILffggEFme"),
    "Worlds_Largest_Playlist" : ("jaydenrios","7eQUJ3fBA5gnubuiwZN1xw"),
    "Largest_Playlist_garfield" : ("Similarity","5vP7h3L0Wxjk5YQvPAyUvR"),
    "the_largest_hyperpop_playlist" : ("zaderules","76rY59e5BGvV3DlJRVw8gL"),
    "Essential_Indie" : ("Spotify", "37i9dQZF1DX26DKvjp0s9M"), 
    "Early_Alternative" : ("Spotify", "37i9dQZF1DXdTCdwCKzXwo"),
    "Alternative_10s" : ("Spotify", "37i9dQZF1DX873GaRGUmPl"),
    "The_New_Alt" : ("Spotify","37i9dQZF1DX82GYcclJ3Ug"),
    "Rock_This" : ("Spotify","37i9dQZF1DXcF6B6QPhFDv"),
    "Rock_Classics" : ("Spotify","37i9dQZF1DWXRqgorJj26U"),
    "sunnnn" : ("drewdifrancesco", "4Fzuns1RQjyBu06bH22wpd"), 
    "pop_show" : ("drewdifrancesco", "4OviVMMnkPNR98IoDAhyiA"),
    "rainy_day" : ("drewdifrancesco", "3sdy8Oo8ndPgNC5I0nneer"),
    "windows_down" : ("drewdifrancesco","43IKQrkOYRDiBwuT0nQ8zI"),
    "sliding_in_socks" : ("drewdifrancesco","0ZtszsPRTutWeNcurhPN7b"),
    "9_story_parking_lot" : ("drewdifrancesco","5wS9lfGOt0xYLcBqmGrYlx"),
    "hopeless" : ("drewdifrancesco","5G7LY9Cyx2F0659b87Loy0"),
    "Greatest_Hits_all_Genre" : ("Brandon Foster", "7bKpDLk9h1MhZPYDHWkk9I"), 
    "all_genres_no_skip_type_beat" : ("derrickcastro", "5JuT3NiwgGyZWkjzuSMk4t"),
    "top_1000+_songs_of_every_genre_of_all_time" : ("Margaret Kopoulos", "4zwCgb1Igqn4azWiGUEYi7"),
    "Ultimate_Party_Mix_All_Genres" : ("Mike Craig","1C49yxU1XBkoq5yaVDbJwx"),
    "no_skips_on_aux" : ("tommyloftus","6IOwPVY870PVzxrDkzREPT"),
    "Best_Music_of_all_Time" : ("JLLs","2cyihP2rQm4u2NrmnGG2KF"),
    "500_best_albums_of_all_time" : ("AS21", "4KmcBdDIbHeO0alvCfk2TC"), 
    "Greatest_Hits_of_All_Time" : ("DJ-on", "7oyTlM7RLR5LYRhDtcSsit"),
    "best_playlist_ever_to_exist" : ("cody", "3Q3wcJbfeaULXxOo7WhVzk"),
    "Best_Electronic_Music_Of_All_Time_&_Top_EDM" : ("EDM Sauce","0veUfZImTY5RCU2uPcvB8s"),
    "All_time_electronic_music" : ("Roy aragon","0eA0F7BT0UD0iWoIKiLw0A"),
    "All_Bangers_all_the_Time" : ("Torrell Booker","5Sga5y4Mufxn7dvePOMUPM"),
    "All_Time_Rock_Hits" : ("Matt Fuller","0AAYZ2yy7FOK4reAKl6Ont"),
    "Best_Rock_songs_of_All_Time" : ("IndieMassive Hits", "1puQ0hv40TUre24cFillJS"), 
    "The_best_alternative_rock_songs_of_all_time" : ("cujoman31", "7nH2S7ZPvQmA3x8dsG3zdq"),
    "Best_Rap_of_All_Time_Goat" : ("d3rek.t", "4n2ikSftK0aQban4IFPqU6"),
    "Rap_Hits_Of_All_Time" : ("Membrives","3rIeHIUNXKBW7fsKij4SCd"),
    "Best_Old_School_Rap_Playlist" : ("spiller1211","1FnkrdJqaAKfDw87DA1ofl"),
    "Progressive_House_All_Time_Favorites" : ("Mert Tunay", "626UDFY53J9Fma3om0Jkx8"), 
    "teatime" : ("laurengeiser1", "4KGkydDtqD0uAW6HKGwXQI"),
    "Hotdog_Birthday_Party" : ("laurengeiser1", "1C7UZTulFEH1OWyQ7TDNO7"),
    "The_Greatest_Soul_Songs_of_All_Time" : ("Lee Coursey","54te8eYX25KWaq1qOcOgWZ"),
    "Jazz_Classics" : ("Spotify","37i9dQZF1DXbITWG1ZJKYt"),
    "MY_FAVORITE_ALL_TIME_MUSIC_EVER" : ("dazlovestrance","4gydXtIwFVHvrtIdypkfKx"),
    "Bedroom_Pop" : ("Spotify","37i9dQZF1DXcxvFzl58uP7"),
    "Best_Hits_of_All_Time" : ("Pieter De Beucker", "463T5MDn5YxLpA7mcg9nq1"), 
    "Best_Classical_Music_Of_All_Time" : ("Ted", "0bJvpsn0TDZwIDUjz4d75S"),
    "THE_LONGEST_PLAYLIST_ON_SPOTIFY" : ("cecilia","5PnZwfGAngDVXWEHHKRCAP"),
    "Longest_Playlist_ever_+6000" : ("SkylahListens","5oZjXwvrbAJ29Beza3h1bn"),
    "Longest_Playlist_on_spotify" : ("Mr moo","0ntC6utoa1Ea8oo5R5fAVe"),
    "the_longest_playlist_i_have" : ("maddy", "2hgBVAx2Q25G0fgmboZzlr"), 
    "every_song_ever_basically" : ("makenaeel", "7xCOF8dQ4X2HfYsctRYG0u"),
    "Most_popular_music_2010_2020" : ("jhoset", "3VS9luDoG54XlJuBIuOYA9"),
    "60s_70s_80s_Popular_Music" : ("mattalizer", "0LEbhcWqOsiIlQn9HHVN4S"),
    "Popular_2000s_Music" : ("ToxicFoxx", "4QDXK01KcMs51G7vM7gTC8"),
    "_" : ("jordan", "6Xkj5jjuPZ2iTOWwn45t1H"),
    "Amazing_music" : ("ExoticFoxy", "0HzDWXkUg7YiiSIomlSSrH"),
    "BEST_SONGS_OF_ALL_TIME" : ("Best Songs of All Time", "0gqrnk12Q8OExuCeKyBRCq"),
    "best_music_heart" : ("yasmeiri", "4Q5V8SVz9CaoVXxT95i5mx"),
    "BEST_MUSIC_OF_2022" : ("Gustavo Heineken", "1I5IMKDLKyzKoOPMaEWu5w"),
    "Best_taste_in_music" : ("jaydengu", "42sqNnIxQXLBf3IINn2d9q"),
    "blasting_music_in_the_car_with_all_the_windows_down" : ("kgzanni5", "38jPVm0hVi8jlkpis47YyW")
}

In [51]:
%%time
multiple_playlist_df = analyze_playlist_dict(playlist_dict)

Wall time: 11min 51s


In [52]:
multiple_playlist_df = multiple_playlist_df.drop_duplicates()

In [53]:
multiple_playlist_df.size

102222

In [54]:
multiple_playlist_df.to_csv("spotifySongFeatureData.csv",index=False)