In [17]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from client_info import SPOTIFY_CID as cid, SPOTIFY_CS as secret
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans


In [18]:
#Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


In [19]:
def get_all_tracks(user_id,playlist_id):
    results = sp.user_playlist_tracks(user_id, playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [20]:
#>=500 million plays 
#https://open.spotify.com/playlist/2YRe7HRKNRvXdJBp9nXFza?si=\198755290d2c4102
#user
#https://open.spotify.com/user/1221028518?si=2aa6f2f72ab44757

mil_playlist_url = 'https://open.spotify.com/playlist/2YRe7HRKNRvXdJBp9nXFza?si=\198755290d2c4102'
track_list = get_all_tracks('2aa6f2f72ab44757', mil_playlist_url)
#sp.playlist(mil_playlist_url)['tracks']['items']
len(track_list)

682

In [28]:
#create list to create dataframe with. contains all features+song name and artist
track_info = []
bad_info = ['type', 'id', 'uri', 'track_href', 'analysis_url']
total_genres = set()
for track in track_list:
    track_uri=track['track']['uri']
    usable_track=sp.track(track_uri)
    track_id=usable_track['id']

    track_features = sp.audio_features(track_id)[0]
    track_features['name'] = usable_track['name']
    track_features['artist(s)'] = [usable_track['artists'][i]['name'] 
        for i in range(len(usable_track['artists']))]

    artist_ids = [usable_track['artists'][i]['uri'] 
        for i in range(len(usable_track['artists']))]
    genres = [sp.artist(artist)['genres'] for artist in artist_ids]
    track_features['genres'] = genres
    [total_genres.update(genre) for genre in genres]

    for info in bad_info:
        track_features.pop(info)

    track_info.append(track_features)


In [29]:
track_info

[{'danceability': 0.514,
  'energy': 0.73,
  'key': 1,
  'loudness': -5.934,
  'mode': 1,
  'speechiness': 0.0598,
  'acousticness': 0.00146,
  'instrumentalness': 9.54e-05,
  'liveness': 0.0897,
  'valence': 0.334,
  'tempo': 171.005,
  'duration_ms': 200040,
  'time_signature': 4,
  'name': 'Blinding Lights',
  'artist(s)': ['The Weeknd'],
  'genres': [['canadian contemporary r&b', 'canadian pop', 'pop']]},
 {'danceability': 0.825,
  'energy': 0.652,
  'key': 1,
  'loudness': -3.183,
  'mode': 0,
  'speechiness': 0.0802,
  'acousticness': 0.581,
  'instrumentalness': 0,
  'liveness': 0.0931,
  'valence': 0.931,
  'tempo': 95.977,
  'duration_ms': 233713,
  'time_signature': 4,
  'name': 'Shape of You',
  'artist(s)': ['Ed Sheeran'],
  'genres': [['pop', 'uk pop']]},
 {'danceability': 0.824,
  'energy': 0.587,
  'key': 6,
  'loudness': -6.401,
  'mode': 0,
  'speechiness': 0.0937,
  'acousticness': 0.69,
  'instrumentalness': 0.000105,
  'liveness': 0.149,
  'valence': 0.514,
  'tempo

In [30]:
num_track_data = [[track[data] for data in track if 
    (isinstance(track[data], float) 
        | isinstance(track[data], int))] for track in track_info]

num_track_data

[[0.514,
  0.73,
  1,
  -5.934,
  1,
  0.0598,
  0.00146,
  9.54e-05,
  0.0897,
  0.334,
  171.005,
  200040,
  4],
 [0.825,
  0.652,
  1,
  -3.183,
  0,
  0.0802,
  0.581,
  0,
  0.0931,
  0.931,
  95.977,
  233713,
  4],
 [0.824,
  0.587,
  6,
  -6.401,
  0,
  0.0937,
  0.69,
  0.000105,
  0.149,
  0.514,
  98.029,
  209438,
  4],
 [0.501,
  0.405,
  1,
  -5.679,
  1,
  0.0319,
  0.751,
  0,
  0.105,
  0.446,
  109.891,
  182161,
  4],
 [0.585,
  0.52,
  5,
  -6.136,
  0,
  0.0712,
  0.124,
  7.01e-05,
  0.131,
  0.129,
  159.801,
  218147,
  4],
 [0.76,
  0.479,
  2,
  -5.574,
  1,
  0.0466,
  0.556,
  0,
  0.0703,
  0.913,
  89.911,
  158040,
  4],
 [0.792,
  0.625,
  1,
  -5.609,
  1,
  0.0536,
  0.00776,
  0.00188,
  0.329,
  0.37,
  103.967,
  173987,
  4],
 [0.738,
  0.541,
  8,
  -5.597,
  1,
  0.0303,
  0.415,
  0,
  0.11,
  0.657,
  94.949,
  245507,
  4],
 [0.772,
  0.78,
  10,
  -4.288,
  0,
  0.108,
  0.0512,
  0,
  0.119,
  0.719,
  124.988,
  204347,
  4],
 [0.591,
  0.

In [31]:
num_features = len(num_track_data[0])

In [32]:
reduced_genres = {}
for genre in total_genres:
    if ' ' not in genre or genre == "hip hop":
        reduced_genres[genre] = []

for genre in total_genres:
    base_genre = genre
    if ' ' in genre:
        for key in reduced_genres.keys():
            if key in genre:
                base_genre = key
    if base_genre not in reduced_genres:
        reduced_genres[base_genre] = []
    else:
        reduced_genres[base_genre].append(genre)

In [33]:
print(len(reduced_genres))
reduced_genres

110


{'eurodance': ['eurodance'],
 'neo-psychedelic': ['neo-psychedelic'],
 'r&b': ['r&b',
  'indie r&b',
  'alternative r&b',
  'canadian contemporary r&b',
  'chill r&b',
  'contemporary r&b',
  'uk contemporary r&b'],
 'afropop': ['afropop'],
 'edm': ['edm', 'dutch edm'],
 'madchester': ['madchester'],
 'metropopolis': ['metropopolis'],
 'shoegaze': ['shoegaze', 'french shoegaze'],
 'soul': ['soul', 'british soul', 'northern soul', 'neo soul', 'classic soul'],
 'electro': ['electro', 'canadian electronic'],
 'britpop': ['britpop'],
 'hip hop': ['minnesota hip hop',
  'canadian old school hip hop',
  'seattle hip hop',
  'mississippi hip hop',
  'atl hip hop',
  'hip hop',
  'kentucky hip hop',
  'oakland hip hop',
  'dutch hip hop',
  'portland hip hop',
  'deep underground hip hop',
  'nigerian hip hop',
  'memphis hip hop',
  'north carolina hip hop',
  'virginia hip hop',
  'tennessee hip hop',
  'latin hip hop',
  'argentine hip hop',
  'underground hip hop',
  'canadian hip hop',
  

In [34]:
n = len(reduced_genres)
n

110

In [35]:
#kmeans
km = KMeans(n_clusters=n, init='random', n_init='auto', 
    tol=1e-04, random_state=0)
y_km = km.fit_predict(num_track_data)
y_km

array([ 64,  80,  47,  83, 101,  40,  95,  91,   8,  38,  27,  63,  16,
        20,  52,  53,  59,  76,  68, 106,  64,  95,   5,  85,  60,  23,
        80,   9,  54,  90,   6,  59,  87, 100,  28,  15, 101,  29,  68,
        47,  71, 107,  82,  35,  89,  79,   9,   8,  90,  64,   5,  51,
       102,  82,  26,  71,  75,  90,  14,  37,  44,  11,  29,  47,   6,
        99,  44,  14,  64,  37,   4, 102,  34,  30,  29,  61,   8,  38,
       103,  80,  61, 106,  17,  49,   5, 102,  20, 106,  10, 102,  77,
        36,   3, 105,  28,  84,  72,  36,  72,  83,  29, 105,  94,  94,
        87,  66, 101,  47,  47,  98,  70,   7,  40,  79,  19,  71,  37,
        71,  86,   8,  87,  23,  32,  54,  40,  44,   4,   6,   2,  56,
        38,  10,  49,  93,  75,  16, 101,  42,  47,   8,  58,  16,  60,
        86,  45,  16,  27,  18,  85,  89,  98,  10,  32,  19,  29,  48,
        93,  39, 101,  93,  71,  29,  99,  29,   8,  98,  38,  26,  91,
        37, 106,  87, 102,  99,  20,  88,  30,  77,  91,   0,  2

In [36]:
print("Number of tracks: " + str(len(y_km)) + "\n" +
    "Number of genres: " + str(len(set(y_km))))

Number of tracks: 682
Number of genres: 110


In [37]:
for i in range(len(track_info)):
    track_info[i]["Category"] = y_km[i]

track_info

[{'danceability': 0.514,
  'energy': 0.73,
  'key': 1,
  'loudness': -5.934,
  'mode': 1,
  'speechiness': 0.0598,
  'acousticness': 0.00146,
  'instrumentalness': 9.54e-05,
  'liveness': 0.0897,
  'valence': 0.334,
  'tempo': 171.005,
  'duration_ms': 200040,
  'time_signature': 4,
  'name': 'Blinding Lights',
  'artist(s)': ['The Weeknd'],
  'genres': [['canadian contemporary r&b', 'canadian pop', 'pop']],
  'Category': 64},
 {'danceability': 0.825,
  'energy': 0.652,
  'key': 1,
  'loudness': -3.183,
  'mode': 0,
  'speechiness': 0.0802,
  'acousticness': 0.581,
  'instrumentalness': 0,
  'liveness': 0.0931,
  'valence': 0.931,
  'tempo': 95.977,
  'duration_ms': 233713,
  'time_signature': 4,
  'name': 'Shape of You',
  'artist(s)': ['Ed Sheeran'],
  'genres': [['pop', 'uk pop']],
  'Category': 80},
 {'danceability': 0.824,
  'energy': 0.587,
  'key': 6,
  'loudness': -6.401,
  'mode': 0,
  'speechiness': 0.0937,
  'acousticness': 0.69,
  'instrumentalness': 0.000105,
  'liveness':

In [38]:
tracks_df = pd.DataFrame.from_dict(track_info)
tracks_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,name,artist(s),genres,Category
0,0.514,0.730,1,-5.934,1,0.0598,0.00146,0.000095,0.0897,0.334,171.005,200040,4,Blinding Lights,[The Weeknd],"[[canadian contemporary r&b, canadian pop, pop]]",64
1,0.825,0.652,1,-3.183,0,0.0802,0.58100,0.000000,0.0931,0.931,95.977,233713,4,Shape of You,[Ed Sheeran],"[[pop, uk pop]]",80
2,0.824,0.587,6,-6.401,0,0.0937,0.69000,0.000105,0.1490,0.514,98.029,209438,4,Dance Monkey,[Tones And I],"[[australian pop, pop]]",47
3,0.501,0.405,1,-5.679,1,0.0319,0.75100,0.000000,0.1050,0.446,109.891,182161,4,Someone You Loved,[Lewis Capaldi],"[[pop, uk pop]]",83
4,0.585,0.520,5,-6.136,0,0.0712,0.12400,0.000070,0.1310,0.129,159.801,218147,4,rockstar (feat. 21 Savage),"[Post Malone, 21 Savage]","[[dfw rap, melodic rap, rap], [atl hip hop, rap]]",101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
677,0.414,0.428,0,-11.097,1,0.0278,0.38200,0.000243,0.1480,0.282,145.075,377093,4,Tiny Dancer,[Elton John],"[[glam rock, mellow gold, piano rock]]",55
678,0.749,0.491,10,-9.655,1,0.0403,0.02000,0.008990,0.1590,0.536,119.528,292613,4,Gimme! Gimme! Gimme! (A Man After Midnight),[ABBA],"[[europop, swedish pop]]",77
679,0.561,0.965,7,-3.673,0,0.0343,0.00383,0.000007,0.3710,0.304,128.040,175238,4,I'm Good (Blue),"[David Guetta, Bebe Rexha]","[[big room, dance pop, edm, pop, pop dance], [...",34
680,0.552,0.608,1,-9.786,0,0.0345,0.35200,0.000000,0.0533,0.637,112.367,162373,4,California Dreamin' - Single Version,[The Mamas & The Papas],"[[bubblegum pop, classic rock, folk rock, mell...",58


In [39]:
tracks_by_category = tracks_df.sort_values(by='Category')
tracks_by_category


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,name,artist(s),genres,Category
545,0.597,0.450,8,-6.658,1,0.0418,0.22300,0.000000,0.3820,0.537,178.765,228482,3,Stuck with U (with Justin Bieber),"[Ariana Grande, Justin Bieber]","[[dance pop, pop], [canadian pop, pop]]",0
258,0.461,0.593,1,-4.954,1,0.0356,0.17000,0.000000,0.3070,0.175,134.170,228373,3,Call Out My Name,[The Weeknd],"[[canadian contemporary r&b, canadian pop, pop]]",0
582,0.693,0.678,8,-7.159,1,0.0324,0.01510,0.000006,0.1010,0.404,103.952,228267,4,Higher Love,"[Kygo, Whitney Houston]","[[edm, pop, pop dance, tropical house], [dance...",0
583,0.512,0.662,3,-6.797,1,0.0439,0.02750,0.000000,0.1180,0.472,180.114,228347,4,Fireflies,[Owl City],"[[indietronica, pop rock]]",0
390,0.393,0.858,4,-2.868,0,0.0729,0.00359,0.000000,0.1020,0.560,176.042,228360,4,Centuries,[Fall Out Boy],"[[emo, modern rock]]",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,0.783,0.584,7,-6.572,1,0.0409,0.01090,0.000003,0.1080,0.632,120.031,193467,4,Call Me Maybe,[Carly Rae Jepsen],"[[canadian pop, dance pop, electropop, pop, po...",108
531,0.691,0.823,2,-4.197,1,0.0623,0.48700,0.000000,0.1340,0.870,123.935,191467,4,If I Can't Have You,[Shawn Mendes],"[[canadian pop, dance pop, pop, viral pop]]",109
403,0.688,0.702,7,-4.792,0,0.0499,0.02150,0.000000,0.1280,0.740,94.006,192191,4,My House,[Flo Rida],"[[dance pop, edm, miami hip hop, pop, pop rap]]",109
237,0.671,0.314,5,-8.091,0,0.2440,0.11000,0.000000,0.0825,0.296,110.898,191600,4,Needed Me,[Rihanna],"[[barbadian pop, dance pop, pop, urban contemp...",109
