In [2]:
import pandas as pd
import numpy as np
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
from annoy import AnnoyIndex
from datetime import datetime

In [39]:
def get_nn(t, songs):
    nearest_neighbors = list()
    song_features = np.array(get_features(songs)[['danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo']])
    for s in song_features:
        nearest_neighbors.append(t.get_nns_by_vector(s, 5, search_k=-1, include_distances=True))
        
    df_nearest_neighbors_indices = pd.DataFrame()
    for nn in nearest_neighbors:
        # unpacks tuple and appends to df
        df_nearest_neighbors_indices = df_nearest_neighbors_indices.append(
            pd.DataFrame.from_dict(dict(zip(nn[0], nn[1])), 
                                   orient='index'))
    df_nearest_neighbors_indices = (
        df_nearest_neighbors_indices
        .reset_index()
        .groupby('index', as_index=False)
        .min()
        .rename(columns={"index":"song_index", 0:"distance"})
    )    
    # for each song in songs, we get a few close songs. Return all of them?
    return df_nearest_neighbors_indices

In [17]:
def recurse_playlist(id):
    r = sp.playlist(id)
    playlists.append(r)
    if 'tracks' in r:
        if 'next' in r['tracks']:
            if r['tracks']['next'] is not None:
                recurse_playlist(str.replace(r['tracks']['next'], 'https://api.spotify.com/v1/playlists/', ''))
    elif 'next' in r:
        if r['next'] is not None:
            recurse_playlist(str.replace(r['next'], 'https://api.spotify.com/v1/playlists/', ''))
    else:
        pass

In [4]:
def get_metadata(ids):
    metadata = list()
    # 50 limit on api call    
    for i in range(0,len(ids),50):
        meta = sp.tracks(ids[i:i+50])
        for data in meta['tracks']:
            artists = list()
            if data is not None:
                for artist in data['artists']:
                    artists.append(artist['id'])
                metadata.append({"song_id":data['id'],  
                                 "artists":artists, 
                                 "song_name":data['name'], 
                                 "album_id":data['album']['id'],
                                 "album_name":data['album']['name'],
                                 "popularity":data['popularity']})
    return pd.DataFrame(metadata)

In [44]:
def build_annoy_index(array, trees=10):
    # vector size is feature length, idk wtf angular does
    t = AnnoyIndex(11, 'angular')
    for i in range(len(array)):
        t.add_item(i, selected_features[i])
    # once this is built, you can't add items to it. Should we build it somewhere else?
    t.build(trees)
    
    return t

In [41]:
def create_playlist(songs):
    user_id = sp.me()['id']
    timestampStr = datetime.now().strftime("%d-%b-%Y (%H:%M:%S.%f)")
    sp.user_playlist_create(user_id, f'Customized Discover Weekly - {timestampStr}')
    # We don't get the playlist id when we create it, so we take the user's first 50 playlists, then find the name we gave it
    playlists = sp.user_playlists(sp.me()['id'])['items']
    playlist_names = {}
    for i in playlists:
        playlist_names[i['name']] = i['id']
    playlist_id = playlist_names[f'Customized Discover Weekly - {timestampStr}']
    sp.playlist_add_items(playlist_id, songs)

In [40]:
def apply_filtering_criteria(songs_df):
    # stick a bunch of rules on there to get us to 30 songs
    # right now let's just take top 30
    return songs_df.sort_values(by='distance', ascending=False).head(30)[['song_index']]
    

In [5]:
def get_features(ids):
    features = []
    # 50 limit on api call
    for i in range(0,len(ids),50):
        audio_features = sp.audio_features(ids[i:i+50])
        for track in audio_features:
            if track is not None:
                features.append(track)
    
    return pd.DataFrame(features)

In [6]:
def recurse_saved_songs(offset=0):
    r = sp.current_user_saved_tracks(20, offset)
    for i in r['items']:
        songs.append({"id":i['track']['id'], "added_at":i['added_at']})
    if r['next']:
        offset+=20
        recurse_saved_songs(offset)

In [7]:
cid ="0deb154cdea34cfa9c50fc76938403b9" 
secret = "6aa3c3ee390d4421bdc6a860cf33c686"
username = "Ben"

In [8]:
credentials = SpotifyClientCredentials(
        client_id=cid,
        client_secret=secret)

In [9]:
sp = spotipy.Spotify(auth_manager=credentials)

In [10]:
user_ids = [
    'kirstendodo',
    '1260745480',
    '1255526461',
    'athenajiang',
    '1239664093',
    '1268639225',
    '22eja5lr4xyqrmdgb4jox74pi',
    'russellkim98',
    '1213055384',
    '12563360',
    '1218288200',
    '22vxpm5y7bws42rgecwwdtqzq',
    '2lb6yucjb2mmlbpqvv7f6tpa',
    'hahowie',
    '1233302457',
    'clairehuangg',
    'gmfrlife',
    'karawho',
    '1210134132',
    'gracehuang22',
    'sangeethaisrofl',
    '124028238',
    '1228739853',
    '1249053478',
    '1249402231',
    '1261634901',
    'jasonliao',
    '12101413836',
    '227lrr7ufyoef3thjhnzjcbhi',
    '1263186670',
    '126549512',
    '22tbkcbk3npj7dmozb5xcvegq',
    '1258256267',
    'zenbhang',
    '1249768840',
    '1251291991',
    '1293249673',
    '12184871363',
    '1225107231',
    '21jm5sg2qelg57la434krzpjy',
    '22n5d3jkyzx6ucmdh6stknedi'
]

In [11]:
playlist_ids = set()
for ui in user_ids:
    try:
        r = sp.user_playlists(ui, limit=50)
        for i in r['items']:
            playlist_ids.add(i['id'])
    except:
        print(ui, 'error')

HTTP Error for GET to https://api.spotify.com/v1/users/2lb6yucjb2mmlbpqvv7f6tpa/playlists with Params: {'limit': 50, 'offset': 0} returned 404 due to Not found.


2lb6yucjb2mmlbpqvv7f6tpa error


In [18]:
# This chunk takes a while, possible that there's a better way to do this
playlists = list()
for i in playlist_ids:
    recurse_playlist(str.replace(i, 'https://open.spotify.com/playlist/', ''))

In [19]:
# This chunk is instantaneous
ids = set()
for p in playlists:  
    if 'tracks' in p:
        for item in p['tracks']['items']:
            try:
                if item['track']['id'] is not None:
                    ids.add(item['track']['id'])
            except:
                pass
    elif 'items' in p:
        for item in p['items']:
            try:
                if item['track']['id'] is not None:
                    ids.add(item['track']['id'])
            except:
                pass

In [22]:
df_metadata = get_metadata(list(ids))

In [24]:
df_features = get_features(list(ids))
# We probably just want to build a db with all of these song + features

In [25]:
len(df_metadata)

41109

In [27]:
df_metadata.head()

Unnamed: 0,song_id,artists,song_name,album_id,album_name,popularity
0,6OZoT8AhDTz57JY2UU5J7n,[5E2rtn57BM2WPjwak4kGd5],One Armed Scissor,33h4FVCtfR6FUpFyd2yLNO,Relationship Of Command,0
1,5j7ixaLeGTGSv4DzKs0pCM,[4UK2Lzi6fBfUi9rpDt6cik],だから僕は音楽を辞めた,4b9nOSXSf1LROzgfYFxdxI,だから僕は音楽を辞めた,62
2,2LFprH1a4nNvmYha55xhj6,[7blXVKBSxdFZsIqlhdViKc],sucks to see you doing better,2W8nO4x2KlHc3z3xxe23PW,sucks to see you doing better (extended),54
3,6mQpCpdsCp4uBD4n9fyBuF,[7o7mC95EDbJKTcPAAs8C3r],Touch the Sky,5jZz8NsXPLu7eSUkye6jpa,The Night Is Young,29
4,0sYfwwEy0UyNizk6na4zGm,[12trz2INGglrKMzLmg0y2C],Just Like You,6q8BNcH6wkWwWC0fGoJwkS,The Juice: Vol. II,55


In [28]:
# We lost a bunch of rows apparently? Weird
# df_metadata.loc[[5773, 2176, 54482, 35915, 40399, 20884, 9975, 20049, 21284, 35395, 17865, 18383, 7704, 7742, 20480, 26160, 9222, 8400, 44129, 35589, 7778, 18590, 3200, 25901, 55937],:]

# User Features

In [29]:
scope = ['user-library-read', "playlist-modify-public"]

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id = "0deb154cdea34cfa9c50fc76938403b9",
                                               client_secret = secret,
                                               redirect_uri= 'http://127.0.0.1:5000/spotify/callback',
                                               scope=scope,
                                              open_browser=True))

In [35]:
songs = list()
recurse_saved_songs()

df_your_songs = pd.DataFrame(songs)

df_your_songs.head()

In [46]:
selected_features = np.array(df_features[['danceability','energy','key','loudness','mode','speechiness','acousticness','instrumentalness','liveness','valence','tempo']])

index = build_annoy_index(selected_features)
index.save('data/test.ann')

In [48]:
df_nearest_neighbors_indices = get_nn(index, df_your_songs.head()['id'])

  df_nearest_neighbors_indices = df_nearest_neighbors_indices.append(
  df_nearest_neighbors_indices = df_nearest_neighbors_indices.append(
  df_nearest_neighbors_indices = df_nearest_neighbors_indices.append(
  df_nearest_neighbors_indices = df_nearest_neighbors_indices.append(
  df_nearest_neighbors_indices = df_nearest_neighbors_indices.append(


In [50]:
df_nearest_neighbors_indices.head()

Unnamed: 0,song_index,distance
0,927,0.003738
1,1071,0.001945
2,1681,0.002409
3,2422,0.001677
4,3466,0.001459


In [51]:
df_filtered_songs = apply_filtering_criteria(df_nearest_neighbors_indices)

In [60]:
output_songs = df_metadata.iloc[list(df_filtered_songs['song_index']), :][['song_id']]

In [61]:
output_songs.head()

Unnamed: 0,song_id
12051,58AGoOGbwsQMhBbH0eFLRR
39613,2TWqTeDjVpcZfgrblmf86g
927,7IRyfeJIBmi08GYgEZwWMM
16448,7CFPVdKn4FFkIm7rZYhNZQ
13944,2QhURnm7mQDxBb5jWkbDug


In [56]:
# index can just be the integer : song id mapping LMFAO
df = df_features.reset_index()

In [None]:
# select song ids that are not already in 
# worry about this later
# right now all we need to do is get the feature vectors for the songs you've loaded and check for nearest

In [None]:

# annoy is basically
# [1: [feature1, feature2, feature3]]
# it only supporst integers as identifiers for items, so will have to map
# t = AnnoyIndex(f, 'angular')  # Length of item vector that will be indexed
#     t.add_item(i, v)
# t.build(10) # 10 trees
# print(t.get_nns_by_item(0, 1000)) # will find the 1000 nearest neighbors

In [None]:
def get_songs_for_nn():
# Apply some sampling criteria to get a bunch of songs based on the user's last X days of activity (sampling? most recent)
    
    return pass

In [57]:
len(songs)

25

In [62]:
create_playlist(list(songs['song_id']))

In [None]:
playlists = sp.user_playlists(sp.me()['id'])['items']

In [None]:
playlist_names = {}
for i in playlists:
    playlist_names[i['name']] = i['id']

In [None]:
np.random.randint(100,101 + 100,size=(100, 1))/100

In [None]:
if type(3) == int:
    print('i')