In [15]:
import joblib
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import os
import matplotlib.pyplot as plt


wd = os.getcwd()
head, tail = wd.rsplit('\\', 1)
head = head.replace('\\', '/')

try:
    from configparser import ConfigParser
except ImportError:
    from ConfigParser import ConfigParser

config = ConfigParser()
config.read(head + '/config.ini')

client_id = config['DEFAULT']['client_id']
client_secret = config['DEFAULT']['client_secret']
redirect_uri = config['DEFAULT']['redirect_uri']
username = config['DEFAULT']['username']

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
scope = 'user-library-read'
token = util.prompt_for_user_token(username, scope, client_id=client_id, client_secret=client_secret, redirect_uri=redirect_uri)
sp = spotipy.Spotify(auth=token)

In [3]:
def get_saved_tracks(limit = 50, offset = 0):
    saved_tracks = [ ]
    
    # get initial list of tracks to determine length
    saved_tracks_obj = sp.current_user_saved_tracks(limit = limit, offset = offset)
    num_saved_tracks = saved_tracks_obj['total']
    
    # loop through to get all saved tracked
    while (offset < num_saved_tracks):
        saved_tracks_obj = sp.current_user_saved_tracks(limit = limit, offset = offset)
        
        # add track information to running list
        for track_obj in saved_tracks_obj['items']:
            saved_tracks.append({
                'name': track_obj['track']['name'],
                'artists': ', '.join([artist['name'] for artist in track_obj['track']['artists']]),
                'track_id': track_obj['track']['id']
            })
            
        offset += limit
        
    return saved_tracks

def get_audio_features(track_ids):
    saved_tracks_audiofeat = [ ]
    
    # iterate through track_ids in groups of 50
    for ix in range(0,len(track_ids),50):
        audio_feats = sp.audio_features(track_ids[ix:ix+50])
        saved_tracks_audiofeat += audio_feats
        
    return saved_tracks_audiofeat

def save_cluster_tracks_to_playlist(playlist_name, track_ids):
    # get all of the users playlists
    all_playlists = get_all_user_playlists()
    
    # check if playlist already exists
    if (playlist_name not in [playlist['name'] for playlist in all_playlists]):
        playlist = sp.user_playlist_create(user = user_id, name = playlist_name, public = True)
    else:
        playlist_id = [playlist['id'] for playlist in all_playlists if playlist['name'] == playlist_name][0]
        playlist = sp.user_playlist(user = user_id, playlist_id = playlist_id)

    # remove any existing tracks in playlist
    while (playlist['tracks']['total'] > 0):
        sp.user_playlist_remove_all_occurrences_of_tracks(user_id, playlist['id'], \
                                                          tracks = [track['track']['id'] for track in \
                                                                    playlist['tracks']['items']])
        playlist = sp.user_playlist(user = user_id, playlist_id = playlist_id)

    # add tracks from cluster
    sp.user_playlist_add_tracks(user_id, playlist_id = playlist['id'], tracks = track_ids)
    
def get_all_user_playlists(playlist_limit = 50, playlist_offset = 0):
    # get initial list of users playlists (first n = playlist_limit), determine total number of playlists
    playlists_obj = sp.user_playlists(user_id, limit = playlist_limit, offset = playlist_offset)
    num_playlists = playlists_obj['total']

    # start accumulating playlist names and ids
    all_playlists = [{'name': playlist['name'], 'id': playlist['id']} for playlist in playlists_obj['items']]
    playlist_offset += playlist_limit

    # continue accumulating through all playlists
    while (playlist_offset < num_playlists):
        playlists_obj = sp.user_playlists(user_id, limit = playlist_limit, offset = playlist_offset)
        all_playlists += [{'name': playlist['name'], 'id': playlist['id']} for playlist in playlists_obj['items']]
        playlist_offset += playlist_limit
        
    return(all_playlists)

In [4]:
FEATURE_KEYS = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo','duration_ms','time_signature']

saved_tracks    = get_saved_tracks()
saved_tracks_df = pd.DataFrame(saved_tracks)

# get audio features for saved songs
saved_tracks_audiofeat    = get_audio_features(track_ids = list(saved_tracks_df['track_id']))
saved_tracks_audiofeat_df = pd.DataFrame(saved_tracks_audiofeat).drop(['analysis_url', 'track_href', \
                                                                       'type', 'uri'], axis = 1)

# merge audio features onto tracks df
saved_tracks_plus_df = saved_tracks_df.merge(saved_tracks_audiofeat_df, how = 'left', \
                                             left_on = 'track_id', right_on = 'id').drop('id', axis = 1)

In [8]:
scaler = StandardScaler()
norm_d = scaler.fit_transform(saved_tracks_plus_df[FEATURE_KEYS])
norm_d = pd.DataFrame(norm_d, columns = FEATURE_KEYS)
norm_d['name'] = saved_tracks_plus_df['name']
norm_d['artists'] = saved_tracks_plus_df['artists']

In [6]:
loaded_model = joblib.load('webapp/model.sav')

In [11]:
norm_d['cluster'] = loaded_model.predict(norm_d[FEATURE_KEYS]) + 1

In [12]:
norm_d[norm_d['cluster']==14]

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,name,artists,cluster
3,0.297719,-0.966591,0.759389,-0.558074,0.761673,-0.634645,-0.822051,1.280063,-0.543152,-0.057972,0.620565,-0.258590,0.145564,Twinkle,Hether,14
22,0.291034,-1.388783,1.601805,-0.559438,0.761673,-0.700191,0.589640,-0.485218,-0.521376,-1.117132,-1.552069,-0.132874,0.145564,She Don't Wear the Same Clothes,Molokai,14
24,-0.812032,0.328133,0.759389,-2.094182,0.761673,0.315761,0.408847,1.221994,-0.617191,-0.898439,-1.051497,-0.338165,0.145564,Satellites,faang,14
103,-0.357435,0.193032,0.197778,0.465316,0.761673,-0.120583,-0.777606,-0.485250,-0.745670,-0.255225,-0.843065,-1.072904,0.145564,Far Rockaway,KAMAUU,14
113,-0.297268,0.187403,1.601805,0.490901,0.761673,-0.593446,-0.767059,-0.271171,-0.274579,-0.915591,0.770236,-1.869371,0.145564,Formula,Labrinth,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1681,-1.246574,-1.034141,1.040194,-0.272889,0.761673,-0.721727,0.909794,-0.441122,-0.618643,-1.104267,-0.661545,0.714873,0.145564,Dory Previn,Camera Obscura,14
1695,2.055940,-0.994737,0.197778,-0.827225,0.761673,-0.291001,-0.800205,-0.483621,-0.683971,-0.426748,-0.052989,0.206508,0.145564,Down Below,Roddy Ricch,14
1712,-0.196989,-1.152355,1.321000,-0.009196,0.761673,-0.325646,-0.780996,-0.485255,-0.612110,-1.160013,0.377783,0.165031,0.145564,Scott and Ramona,Lil Uzi Vert,14
1726,0.083791,0.660258,0.759389,1.182030,0.761673,-0.612173,-0.923295,-0.485239,-1.054167,-0.791237,-0.086638,0.411767,0.145564,Hollow Life,Coast Modern,14


In [13]:
songs = norm_d[['name','artists','cluster']]

In [37]:
grouped = songs.groupby(['cluster'], as_index=False)['artists'].head()
grouped

0                    Moi Je, Petit Biscuit
1                  callmestevieray, Melkin
2                       Felly, Arden Jones
3                                   Hether
4                                    Healy
                       ...                
638                                  Crumb
1147                       Duffle Bag Buru
1210                                  Teho
1363                Carbon Based Lifeforms
1565    Circles Around The Sun, Neal Casal
Name: artists, Length: 100, dtype: object

In [25]:
playlist_dict

{(1, 'ANNA'): ['Impression - Original Mix'],
 (1, 'Ante Perry'): ['Daft Perry - Original'],
 (1, 'Bassnectar'): ['Timestretch'],
 (1, 'Berhana'): ['California'],
 (1, 'Blood Cultures'): ['Flowers for All Occasions'],
 (1, 'Boris Brejcha'): ['Purple Noise'],
 (1, 'COIN'): ['Malibu 1992'],
 (1, 'Cherub'): ['Doses & Mimosas'],
 (1, 'Creedence Clearwater Revival'): ['I Heard It Through The Grapevine'],
 (1, 'Current Joys'): ['A Different Age', 'My Motorcycle'],
 (1, 'Deal Casino'): ['bangbangbang'],
 (1, 'Derek & The Dominos, Duane Allman'): ['Layla'],
 (1, 'GHOST DATA'): ['Angelic Layer'],
 (1, 'Gary Clark Jr.'): ['When My Train Pulls In'],
 (1, 'Getter, Joji'): ['On My Way Out'],
 (1, 'Jaden'): ['Ninety'],
 (1, 'Joyhauser'): ['C166W'],
 (1, 'Kanye West'): ['Jail', 'Jesus Lord'],
 (1, 'Kanye West, Pusha T'): ['Runaway'],
 (1, 'King Gizzard & The Lizard Wizard'): ['Rattlesnake'],
 (1, 'Lena Raine'): ['Resurrections'],
 (1, 'Lil Wayne'): ['Let It All Work Out'],
 (1, 'Lynyrd Skynyrd'): ['Fr

In [None]:
def create_playlist_chart(cluster_name):
    cluster_df = df[df['cluster_grouping'] == cluster_name]
    playlist_table = cluster_df[['artist', 'song_names']]
    playlist_table.index = range(1, len(playlist_table)+1)
    return playlist_table