In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from client_info import cid, cs as secret
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans


In [3]:
#Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)


In [4]:
def get_all_tracks(user_id,playlist_id):
    results = sp.user_playlist_tracks(user_id, playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [5]:
#>=500 million plays 
#https://open.spotify.com/playlist/2YRe7HRKNRvXdJBp9nXFza?si=\198755290d2c4102
#user
#https://open.spotify.com/user/1221028518?si=2aa6f2f72ab44757

mil_playlist_url = 'https://open.spotify.com/playlist/2YRe7HRKNRvXdJBp9nXFza?si=\198755290d2c4102'
track_list = get_all_tracks('2aa6f2f72ab44757', mil_playlist_url)
#sp.playlist(mil_playlist_url)['tracks']['items']
len(track_list)

677

In [6]:
#create list to create dataframe with. contains all features+song name and artist
track_info = []
bad_info = ['type', 'id', 'uri', 'track_href', 'analysis_url']
total_genres = set()
for track in track_list:
    track_uri=track['track']['uri']
    usable_track=sp.track(track_uri)
    track_id=usable_track['id']

    track_features = sp.audio_features(track_id)[0]
    track_features['name'] = usable_track['name']
    track_features['artist'] = usable_track['artists'][0]['name']

    artist_id = usable_track['artists'][0]['uri']
    genres = sp.artist(artist_id)['genres']
    track_features['genres'] = genres
    total_genres.update(genres)

    for info in bad_info:
        track_features.pop(info)

    track_info.append(track_features)


In [7]:
track_info

[{'danceability': 0.514,
  'energy': 0.73,
  'key': 1,
  'loudness': -5.934,
  'mode': 1,
  'speechiness': 0.0598,
  'acousticness': 0.00146,
  'instrumentalness': 9.54e-05,
  'liveness': 0.0897,
  'valence': 0.334,
  'tempo': 171.005,
  'duration_ms': 200040,
  'time_signature': 4,
  'name': 'Blinding Lights',
  'artist': 'The Weeknd',
  'genres': ['canadian contemporary r&b', 'canadian pop', 'pop']},
 {'danceability': 0.825,
  'energy': 0.652,
  'key': 1,
  'loudness': -3.183,
  'mode': 0,
  'speechiness': 0.0802,
  'acousticness': 0.581,
  'instrumentalness': 0,
  'liveness': 0.0931,
  'valence': 0.931,
  'tempo': 95.977,
  'duration_ms': 233713,
  'time_signature': 4,
  'name': 'Shape of You',
  'artist': 'Ed Sheeran',
  'genres': ['pop', 'uk pop']},
 {'danceability': 0.824,
  'energy': 0.587,
  'key': 6,
  'loudness': -6.401,
  'mode': 0,
  'speechiness': 0.0937,
  'acousticness': 0.69,
  'instrumentalness': 0.000105,
  'liveness': 0.149,
  'valence': 0.514,
  'tempo': 98.029,
  '

In [15]:
num_track_data = [[track[data] for data in track if 
    (isinstance(track[data], float) 
        | isinstance(track[data], int))] for track in track_info]

num_track_data

[[0.514,
  0.73,
  1,
  -5.934,
  1,
  0.0598,
  0.00146,
  9.54e-05,
  0.0897,
  0.334,
  171.005,
  200040,
  4],
 [0.825,
  0.652,
  1,
  -3.183,
  0,
  0.0802,
  0.581,
  0,
  0.0931,
  0.931,
  95.977,
  233713,
  4],
 [0.824,
  0.587,
  6,
  -6.401,
  0,
  0.0937,
  0.69,
  0.000105,
  0.149,
  0.514,
  98.029,
  209438,
  4],
 [0.501,
  0.405,
  1,
  -5.679,
  1,
  0.0319,
  0.751,
  0,
  0.105,
  0.446,
  109.891,
  182161,
  4],
 [0.585,
  0.52,
  5,
  -6.136,
  0,
  0.0712,
  0.124,
  7.01e-05,
  0.131,
  0.129,
  159.801,
  218147,
  4],
 [0.76,
  0.479,
  2,
  -5.574,
  1,
  0.0466,
  0.556,
  0,
  0.0703,
  0.913,
  89.911,
  158040,
  4],
 [0.792,
  0.625,
  1,
  -5.609,
  1,
  0.0536,
  0.00776,
  0.00188,
  0.329,
  0.37,
  103.967,
  173987,
  4],
 [0.738,
  0.541,
  8,
  -5.597,
  1,
  0.0303,
  0.415,
  0,
  0.11,
  0.657,
  94.949,
  245507,
  4],
 [0.772,
  0.78,
  10,
  -4.288,
  0,
  0.108,
  0.0512,
  0,
  0.119,
  0.719,
  124.988,
  204347,
  4],
 [0.591,
  0.

In [17]:
num_features = len(num_track_data[0])

In [8]:
reduced_genres = {}
for genre in total_genres:
    if ' ' not in genre or genre == "hip hop":
        reduced_genres[genre] = []

for genre in total_genres:
    base_genre = genre
    if ' ' in genre:
        for key in reduced_genres.keys():
            if key in genre:
                base_genre = key
    if base_genre not in reduced_genres:
        reduced_genres[base_genre] = []
    else:
        reduced_genres[base_genre].append(genre)

In [24]:
print(len(reduced_genres))
reduced_genres

89


{'neo-synthpop': ['neo-synthpop'],
 'house': ['house',
  'deep house',
  'tropical house',
  'progressive house',
  'deep euro house',
  'slap house',
  'filter house'],
 'funk': ['funk', 'g funk', 'jazz funk', 'uk funky'],
 'country': ['country', 'country road', 'contemporary country'],
 'protopunk': ['protopunk'],
 'disco': ['disco'],
 'brostep': ['brostep'],
 'k-pop': ['k-pop'],
 'hollywood': ['hollywood'],
 'moombahton': ['moombahton'],
 'pluggnb': ['pluggnb'],
 'punk': ['skate punk', 'punk', 'punk blues'],
 'rock': ['dance rock',
  'swamp rock',
  'alternative rock',
  'blues rock',
  'hard rock',
  'yacht rock',
  'symphonic rock',
  'soft rock',
  'rock',
  'album rock',
  'heartland rock',
  'garage rock',
  'art rock',
  'modern rock',
  'french rock',
  'indie rock italiano',
  'piano rock',
  'irish rock',
  'glam rock',
  'detroit rock',
  'country rock',
  'australian rock',
  'roots rock',
  'classic rock',
  'southern rock',
  'canadian rock',
  'modern alternative rock'

In [10]:
n = len(reduced_genres)
n

89

In [21]:
#kmeans
km = KMeans(n_clusters=n, init='random', n_init='auto', 
    tol=1e-04, random_state=0)
y_km = km.fit_predict(num_track_data)
y_km

array([ 9, 16, 51, 41, 11, 35, 58, 72, 57, 64, 87, 20,  7, 29, 23, 22, 85,
       74, 52,  9,  9, 58, 36,  4, 72, 81, 16,  5, 80, 30, 79, 85, 53, 70,
       30, 68, 11, 45, 52, 51, 18, 49, 24, 65, 88, 55,  5, 12, 30, 17, 36,
       70, 34, 24, 24, 18, 74, 30, 53, 67, 37, 58, 21,  6, 79, 25, 37, 87,
       17, 42, 59, 33, 24,  2, 32, 64, 26, 64, 62, 16, 27,  9, 14,  3, 36,
       50, 29,  9, 39, 50, 54, 38, 25,  7, 30, 83, 16, 38, 16, 41, 69,  7,
        3,  3, 77, 75, 11, 51,  6, 65, 22, 86, 35, 55, 46, 18, 84, 18, 76,
       34, 53, 71, 88, 80, 35, 37, 61, 79, 13, 71, 78, 39,  3, 66, 74, 85,
       11, 58,  6, 34, 60,  7, 72, 76, 79, 85, 87, 54,  4, 88, 65, 39, 88,
       46, 21,  0, 66, 14, 11, 66, 18, 45, 25, 45, 57, 65, 78, 24, 72, 84,
        9, 53, 50, 25, 10, 31,  2, 39, 72,  1, 73, 66,  0, 19, 65, 60, 22,
       81, 37,  3, 65, 68, 85, 74, 24, 38, 30,  0, 16, 18, 83, 34, 54, 72,
       25, 19, 46, 74, 48, 23, 17, 29, 79, 65, 45, 33, 80, 15,  2, 59, 56,
       27,  8, 77, 38, 71

In [25]:
print("Number of tracks: " + str(len(y_km)) + "\n" +
    "Number of genres: " + str(len(set(y_km))))

Number of tracks: 677
Number of genres: 89


In [27]:
for i in range(len(track_info)):
    track_info[i]["Category"] = y_km[i]

track_info

[{'danceability': 0.514,
  'energy': 0.73,
  'key': 1,
  'loudness': -5.934,
  'mode': 1,
  'speechiness': 0.0598,
  'acousticness': 0.00146,
  'instrumentalness': 9.54e-05,
  'liveness': 0.0897,
  'valence': 0.334,
  'tempo': 171.005,
  'duration_ms': 200040,
  'time_signature': 4,
  'name': 'Blinding Lights',
  'artist': 'The Weeknd',
  'genres': ['canadian contemporary r&b', 'canadian pop', 'pop'],
  'Category': 9},
 {'danceability': 0.825,
  'energy': 0.652,
  'key': 1,
  'loudness': -3.183,
  'mode': 0,
  'speechiness': 0.0802,
  'acousticness': 0.581,
  'instrumentalness': 0,
  'liveness': 0.0931,
  'valence': 0.931,
  'tempo': 95.977,
  'duration_ms': 233713,
  'time_signature': 4,
  'name': 'Shape of You',
  'artist': 'Ed Sheeran',
  'genres': ['pop', 'uk pop'],
  'Category': 16},
 {'danceability': 0.824,
  'energy': 0.587,
  'key': 6,
  'loudness': -6.401,
  'mode': 0,
  'speechiness': 0.0937,
  'acousticness': 0.69,
  'instrumentalness': 0.000105,
  'liveness': 0.149,
  'vale

In [28]:
tracks_df = pd.DataFrame.from_dict(track_info)
tracks_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,name,artist,genres,Category
0,0.514,0.730,1,-5.934,1,0.0598,0.00146,0.000095,0.0897,0.334,171.005,200040,4,Blinding Lights,The Weeknd,"[canadian contemporary r&b, canadian pop, pop]",9
1,0.825,0.652,1,-3.183,0,0.0802,0.58100,0.000000,0.0931,0.931,95.977,233713,4,Shape of You,Ed Sheeran,"[pop, uk pop]",16
2,0.824,0.587,6,-6.401,0,0.0937,0.69000,0.000105,0.1490,0.514,98.029,209438,4,Dance Monkey,Tones And I,"[australian pop, pop]",51
3,0.501,0.405,1,-5.679,1,0.0319,0.75100,0.000000,0.1050,0.446,109.891,182161,4,Someone You Loved,Lewis Capaldi,"[pop, uk pop]",41
4,0.585,0.520,5,-6.136,0,0.0712,0.12400,0.000070,0.1310,0.129,159.801,218147,4,rockstar (feat. 21 Savage),Post Malone,"[dfw rap, melodic rap, rap]",11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
672,0.440,0.317,8,-9.258,1,0.0531,0.89100,0.000005,0.1410,0.268,169.914,233456,3,Glimpse of Us,Joji,[viral pop],16
673,0.729,0.650,5,-3.539,1,0.0313,0.07020,0.000067,0.0829,0.347,110.020,209160,4,Habits (Stay High),Tove Lo,"[dance pop, electropop, metropopolis, pop, swe...",51
674,0.504,0.904,6,-6.863,1,0.1790,0.35600,0.121000,0.1400,0.772,176.808,183440,4,I'm Still Standing,Elton John,"[glam rock, mellow gold, piano rock]",28
675,0.554,0.315,3,-11.028,1,0.0298,0.82800,0.000006,0.1010,0.325,128.157,241787,4,Your Song,Elton John,"[glam rock, mellow gold, piano rock]",30


In [30]:
tracks_by_category = tracks_df.sort_values(by='Category')
tracks_by_category


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,name,artist,genres,Category
288,0.636,0.815,5,-5.098,0,0.0581,0.01850,0.000000,0.1630,0.636,123.063,219043,4,Sugar (feat. Francesco Yates),Robin Schulz,"[dance pop, deep euro house, deep house, edm, ...",0
321,0.788,0.859,1,-5.542,1,0.3150,0.12700,0.000000,0.3480,0.769,140.027,219333,4,Ni**as In Paris,JAY-Z,"[east coast hip hop, hip hop, rap]",0
515,0.500,0.824,4,-5.846,0,0.0388,0.00016,0.001610,0.0916,0.539,120.125,218920,4,Come As You Are,Nirvana,"[grunge, permanent wave, rock]",0
419,0.647,0.785,7,-5.414,1,0.1650,0.05610,0.000000,0.1480,0.943,160.015,219200,4,Shake It Off,Taylor Swift,[pop],0
182,0.736,0.449,1,-11.462,0,0.4250,0.33000,0.000162,0.0898,0.326,80.126,219080,4,Location,Khalid,"[pop, pop r&b]",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
662,0.492,0.810,7,-5.315,1,0.0847,0.04160,0.003790,0.1170,0.245,149.954,251840,4,Dog Days Are Over,Florence + The Machine,"[baroque pop, pop, uk alternative pop]",88
44,0.479,0.545,7,-7.346,1,0.0688,0.36500,0.000000,0.0963,0.244,74.897,252733,4,Let Her Go,Passenger,"[folk-pop, neo mellow]",88
632,0.520,0.852,0,-5.866,1,0.0543,0.00237,0.000058,0.0733,0.234,140.267,253587,4,505,Arctic Monkeys,"[garage rock, modern rock, permanent wave, roc...",88
122,0.816,0.726,5,-3.998,0,0.1290,0.09900,0.000000,0.3720,0.650,136.048,253390,4,I Like It,Cardi B,"[dance pop, pop, rap]",88
