# SpotiFinder


---

### Spotify request code

In [15]:
import sys
import json
import timeit
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv


class SpotiFinder():
    '''docstring'''
    load_dotenv()
    
    
    def __init__(self):
        self.sp       = None
        self.token    = None
        self.username = None
        
        
    def authenticate(self):
        '''The username needs to be the one written on the top right of spotify.
        I can\'t find a way to get this infor without asking for it from a user, but it has to be done.'''
        
        self.username = 'a4kqw8b9sws8zwd5bfjdjy4zk'
        scope         = 'user-library-read'
        self.token    = util.prompt_for_user_token(self.username
                                                  ,scope
                                                  )
        
    def get_saved_tracks(self, limit):
        '''Grabs a list of all a user's saved tracks ids'''
        

        sp            = spotipy.Spotify(auth = self.token)
        saved_tracks  = sp.current_user_saved_tracks(limit = limit)
        track_id_list = []
        
        for track in saved_tracks['items']:
            track_id_list.append(track['track']['id'])
        return track_id_list
    
    def song_features(self, track_id_list):
        '''Takes in a list of track ids and returns a feature list for use with our nearest neighbors model.'''
        
        
        if self.token:
            sp           = spotipy.Spotify(auth = self.token)
            
            start        = timeit.default_timer()
            rows         = []
            batchsize    = 100
            None_counter = 0

            for iterator in range(0, len(track_id_list), batchsize):

                batch           = track_id_list[iterator: iterator + batchsize]
                feature_results = sp.audio_features(batch)

                for i, features in enumerate(feature_results):
                    if features == None:
                        None_counter = None_counter + 1
                    else:
                        rows.append(features)

            print('Number of tracks where no audio features were available:', None_counter)
            stop = timeit.default_timer()
            print('Time to run this code (in seconds):', stop - start)
            return rows
        else:
                print('Can\'t get token for', self.username)

    def get_saved_playlists(self, limit):
        '''Grabs a list of all a user's saved playlists'''
        

        sp        = spotipy.Spotify(auth = self.token)
        playlists = sp.user_playlists(self.username, limit = limit)
        
        list_of_playlists = []
        
        
        
        for playlist in playlists['items']:
            print(playlist['name'])
            print (f"Total tracks: {playlist['tracks']['total']}")
            
            results = sp.playlist(playlist['id'], fields = "tracks,next")
            tracks  = results['tracks']
            
        return playlists['items']
    
    def track_lookup(self, track_ids):
        '''docstring'''
        
        
        sp    = spotipy.Spotify(auth = self.token)
        track = sp.track(track_ids)
        return track
            
spot = SpotiFinder()
spot.authenticate()

In [23]:
playlists[0]['tracks']

{'href': 'https://api.spotify.com/v1/playlists/5jOsMhFoRXu8sL9cz8zzQq/tracks',
 'total': 1}

In [19]:
spot.track_lookup('5jOsMhFoRXu8sL9cz8zzQq')

SpotifyException: http status: 404, code:-1 - https://api.spotify.com/v1/tracks/5jOsMhFoRXu8sL9cz8zzQq:
 non existing id

In [9]:
playlists = spot.get_saved_playlists(limit = 50)


My playlist #2
Total tracks: 1


In [10]:
playlists[0]['id']

'5jOsMhFoRXu8sL9cz8zzQq'

In [117]:
for thing in playlists['items']:
    print(thing['track']['name'])

Danger Zone


In [59]:
tracks = spot.get_saved_tracks(limit = 2)
tracks

{'href': 'https://api.spotify.com/v1/me/tracks?offset=0&limit=2',
 'items': [{'added_at': '2020-03-03T01:29:48Z',
   'track': {'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/13ab1LgQZ3tQOhkDRRYB8Y'},
       'href': 'https://api.spotify.com/v1/artists/13ab1LgQZ3tQOhkDRRYB8Y',
       'id': '13ab1LgQZ3tQOhkDRRYB8Y',
       'name': 'Mick Gordon',
       'type': 'artist',
       'uri': 'spotify:artist:13ab1LgQZ3tQOhkDRRYB8Y'}],
     'available_markets': ['AD',
      'AE',
      'AR',
      'AT',
      'AU',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'CA',
      'CH',
      'CL',
      'CO',
      'CR',
      'CY',
      'CZ',
      'DE',
      'DK',
      'DO',
      'DZ',
      'EC',
      'EE',
      'EG',
      'ES',
      'FI',
      'FR',
      'GB',
      'GR',
      'GT',
      'HK',
      'HN',
      'HU',
      'ID',
      'IE',
      'IL',
      'IN',
      'IS',
      'IT',
      'JO',
      

In [64]:
tracks['items']

[{'added_at': '2020-03-03T01:29:48Z',
  'track': {'album': {'album_type': 'album',
    'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/13ab1LgQZ3tQOhkDRRYB8Y'},
      'href': 'https://api.spotify.com/v1/artists/13ab1LgQZ3tQOhkDRRYB8Y',
      'id': '13ab1LgQZ3tQOhkDRRYB8Y',
      'name': 'Mick Gordon',
      'type': 'artist',
      'uri': 'spotify:artist:13ab1LgQZ3tQOhkDRRYB8Y'}],
    'available_markets': ['AD',
     'AE',
     'AR',
     'AT',
     'AU',
     'BE',
     'BG',
     'BH',
     'BO',
     'BR',
     'CA',
     'CH',
     'CL',
     'CO',
     'CR',
     'CY',
     'CZ',
     'DE',
     'DK',
     'DO',
     'DZ',
     'EC',
     'EE',
     'EG',
     'ES',
     'FI',
     'FR',
     'GB',
     'GR',
     'GT',
     'HK',
     'HN',
     'HU',
     'ID',
     'IE',
     'IL',
     'IN',
     'IS',
     'IT',
     'JO',
     'JP',
     'KW',
     'LB',
     'LI',
     'LT',
     'LU',
     'LV',
     'MA',
     'MC',
     'MT',
     'MX',
     'MY

In [55]:
tracks['items'][]

IndexError: list index out of range

In [83]:
new_tracks = spot.song_features(tracks)
new_tracks[0]

Number of tracks where no audio features were available: 0
Time to run this code (in seconds): 0.1298514999998588


{'danceability': 0.284,
 'energy': 0.722,
 'key': 7,
 'loudness': -8.284,
 'mode': 1,
 'speechiness': 0.0559,
 'acousticness': 0.00461,
 'instrumentalness': 0.873,
 'liveness': 0.163,
 'valence': 0.0383,
 'tempo': 116.292,
 'type': 'audio_features',
 'id': '4COR2ZPEyUn0lsbAouRWxA',
 'uri': 'spotify:track:4COR2ZPEyUn0lsbAouRWxA',
 'track_href': 'https://api.spotify.com/v1/tracks/4COR2ZPEyUn0lsbAouRWxA',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/4COR2ZPEyUn0lsbAouRWxA',
 'duration_ms': 506763,
 'time_signature': 3}

In [69]:
# Creates a json file of tracks
with open('tracks.json', 'w') as outfile:
    json.dump(tracks['items'][0]['track'], outfile)

In [2]:
#     if token:
#         sp        = spotipy.Spotify(auth = token)
#         playlists = sp.user_playlists(username)
        
#         for playlist in playlists['items']:
#             if playlist['owner']['id'] == username:
#                 print()
#                 print(playlist['name'])
#                 print ('  total tracks', playlist['tracks']['total'])
#                 results = sp.playlist(playlist['id'], fields = "tracks,next")
#                 tracks  = results['tracks']
#                 show_tracks(tracks)
#                 while tracks['next']:
#                     tracks = sp.next(tracks)
#                     show_tracks(tracks)
#     else:
#         print("Can't get token for", username)

In [14]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb


list_of_results      = data[0]["items"]
list_of_artist_names = []
list_of_artist_uri   = []
list_of_song_names   = []
list_of_song_uri     = []
list_of_durations_ms = []
list_of_explicit     = []
list_of_albums       = []
list_of_popularity   = []

for result in list_of_results:
    result["album"]
    this_artists_name = result["artists"][0]["name"]
    list_of_artist_names.append(this_artists_name)
    this_artists_uri  = result["artists"][0]["uri"]
    list_of_artist_uri.append(this_artists_uri)
    list_of_songs     = result["name"]
    list_of_song_names.append(list_of_songs)
    song_uri          = result["uri"]
    list_of_song_uri.append(song_uri)
    list_of_duration  = result["duration_ms"]
    list_of_durations_ms.append(list_of_duration)
    song_explicit     = result["explicit"]
    list_of_explicit.append(song_explicit)
    this_album        = result["album"]["name"]
    list_of_albums.append(this_album)
    song_popularity   = result["popularity"]
    list_of_popularity.append(song_popularity)

NameError: name 'data' is not defined

In [None]:
all_songs = pd.DataFrame(
    {'artist': list_of_artist_names,
     'artist_uri': list_of_artist_uri,
     'song': list_of_song_names,
     'song_uri': list_of_song_uri,
     'duration_ms': list_of_durations_ms,
     'explicit': list_of_explicit,
     'album': list_of_albums,
     'popularity': list_of_popularity
     
    })

all_songs_saved = all_songs.to_csv('top50_songs.csv')

In [8]:
client_credentials_manager = SpotifyClientCredentials(client_id     = Client
                                                     ,client_secret = Secret
                                                     )
sp                         = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [24]:
# timeit library to measure the time needed to run this code
import timeit
start = timeit.default_timer()

# create empty lists where the results are going to be stored
artist_name = []
track_name  = []
popularity  = []
track_id    = []

for i in range(0, 10000, 50):
    track_results = sp.search(q      = 'year:2018'
                             ,type   = 'track'
                             ,limit  = 50
                             ,offset = i
                             )
    for i, t in enumerate(track_results['tracks']['items']):
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])
      

stop = timeit.default_timer()
print ('Time to run this code (in seconds):', stop - start)

NameError: name 'sp' is not defined

### Song features

In [25]:
import timeit


start        = timeit.default_timer()
rows         = []
batchsize    = 100
None_counter = 0

df_tracks    = pd.DataFrame({'artist_name': artist_name
                            ,'track_name' : track_name
                            ,'track_id'   : track_id
                            ,'popularity' : popularity
                            }
                           )

for iterator in range(0, len(df_tracks['track_id']), batchsize):
    
    batch           = df_tracks['track_id'][iterator: iterator + batchsize]
    feature_results = sp.audio_features(batch)
    
    for i, features in enumerate(feature_results):
        if features == None:
            None_counter = None_counter + 1
        else:
            rows.append(features)
            
print('Number of tracks where no audio features were available:', None_counter)

stop = timeit.default_timer()
print ('Time to run this code (in seconds):', stop - start)

Number of tracks where no audio features were available: 0
Time to run this code (in seconds): 0.002217700000073819


In [26]:
rows[0]

IndexError: list index out of range

## Dataframe stuff

In [59]:
import numpy as np
import pandas as pd
from pandas_profiling import ProfileReport
from sklearn.model_selection import train_test_split


df      = pd.read_csv('http://www.zernach.com/wp-content/uploads/2020/02/SpotifyAudioFeaturesApril2019.csv')
# profile = ProfileReport(df
#                        ,title = 'Pandas Profiling Report'
#                        ,html  = {'style':{'full_width':True}}
#                        )

target   = 'track_id'
features = ['acousticness'
           ,'danceability'
           ,'energy'
           ,'instrumentalness'
           ,'key'
           ,'liveness'
           ,'loudness'
           ,'mode'
           ,'speechiness'
           ,'tempo'
           ,'valence'
           ]

X = df[features]
y = df[target]

X.head()

# X_train, X_test, y_train, y_test = train_test_split(X
#                                                    ,y
#                                                    ,train_size   = 0.8
#                                                    ,random_state = 6
#                                                    )
# X_train.head()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,valence
0,0.00582,0.743,0.339,0.0,1,0.0812,-7.678,1,0.409,203.927,0.118
1,0.0244,0.846,0.557,0.0,8,0.286,-7.259,1,0.457,159.009,0.371
2,0.025,0.603,0.723,0.0,9,0.0824,-5.89,0,0.0454,114.966,0.382
3,0.0294,0.8,0.579,0.912,5,0.0994,-12.118,0,0.0701,123.003,0.641
4,3.5e-05,0.783,0.792,0.878,7,0.0332,-10.277,1,0.0661,120.047,0.928


### Nearest Neighbors

In [67]:
# Load
import joblib


filename = 'Nearest_Neighbors.sav'
nn       = joblib.load(filename)
print('Model Loaded.')

Model Loaded.


In [37]:
from sklearn.neighbors import NearestNeighbors


# Fit on DTM
nearest = 10

nn = NearestNeighbors(n_neighbors = nearest
                     ,algorithm   = 'kd_tree'
                     )
nn.fit(X, y)

NearestNeighbors(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                 radius=1.0)

In [39]:
# Save
import joblib


filename = 'Nearest_Neighbors.sav'
joblib.dump(nn, filename)
print('Model saved!')

Model saved!


In [48]:
nn.kneighbors(track)[1][0]

array([130613, 100200,  30048,  32334, 122999, 112293, 124596, 111243,
        94703, 114968], dtype=int64)

In [40]:
import json


track_list = df['track_id'].tail(50)

# Get track features
# This can be replaced with a spotipy request or a specific track from a user as long as it contains the features.
track = df[df['track_id'].isin(track_list)][features]

# This returns a dataframe of the top 10 most likely songs to recommend.
predictions      = nn.kneighbors(track)[1][0]
df_top_similar   = df.iloc[predictions]

# This converts our dataframe to json
json_top_similar = json.loads(df_top_similar.to_json())


# To save as a json file
# df_top_similar.to_json(r'./predict.json')

In [24]:
import sys
import json
import timeit
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv

In [33]:
username      = 'a4kqw8b9sws8zwd5bfjdjy4zk'
scope         = 'user-library-read'
token         = util.prompt_for_user_token(username
                                          ,scope
                                          )

sp            = spotipy.Spotify(auth = token)

In [38]:
sp.track('5soMJpcVhSrGrB4prvPL6P')

{'album': {'album_type': 'album',
  'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/13ab1LgQZ3tQOhkDRRYB8Y'},
    'href': 'https://api.spotify.com/v1/artists/13ab1LgQZ3tQOhkDRRYB8Y',
    'id': '13ab1LgQZ3tQOhkDRRYB8Y',
    'name': 'Mick Gordon',
    'type': 'artist',
    'uri': 'spotify:artist:13ab1LgQZ3tQOhkDRRYB8Y'}],
  'available_markets': ['AD',
   'AE',
   'AR',
   'AT',
   'AU',
   'BE',
   'BG',
   'BH',
   'BO',
   'BR',
   'CA',
   'CH',
   'CL',
   'CO',
   'CR',
   'CY',
   'CZ',
   'DE',
   'DK',
   'DO',
   'DZ',
   'EC',
   'EE',
   'EG',
   'ES',
   'FI',
   'FR',
   'GB',
   'GR',
   'GT',
   'HK',
   'HN',
   'HU',
   'ID',
   'IE',
   'IL',
   'IN',
   'IS',
   'IT',
   'JO',
   'JP',
   'KW',
   'LB',
   'LI',
   'LT',
   'LU',
   'LV',
   'MA',
   'MC',
   'MT',
   'MX',
   'MY',
   'NI',
   'NL',
   'NO',
   'NZ',
   'OM',
   'PA',
   'PE',
   'PH',
   'PL',
   'PS',
   'PT',
   'PY',
   'QA',
   'RO',
   'SA',
   'SE',
   'SG',
   'SK',


In [60]:
df.iloc[0]

artist_name                                                     YG
track_id                                    2RM4jf1Xa9zPgMGRDiht8O
track_name          Big Bank feat. 2 Chainz, Big Sean, Nicki Minaj
acousticness                                               0.00582
danceability                                                 0.743
duration_ms                                                 238373
energy                                                       0.339
instrumentalness                                                 0
key                                                              1
liveness                                                    0.0812
loudness                                                    -7.678
mode                                                             1
speechiness                                                  0.409
tempo                                                      203.927
time_signature                                                

In [106]:
spot   = SpotiFinder()
tracks = spot.get_saved_tracks()
tracks

['4COR2ZPEyUn0lsbAouRWxA',
 '0XhcbuMPBRRcaqRiHMrnKl',
 '5MAPt0beSbEQdPM5SfJwyl',
 '5LfS278Zsmem9pmxmBXgix',
 '5ZGtjzyg3F6XftEyllHDnC',
 '6gHsenqqwzj67gp1OzCIi2',
 '3KtFA8wHjyg44eVtiI4QqE']

In [61]:
df.iloc[44645]

artist_name                                     Mike Mago
track_id                           4tey3xuewWhJUDyV7A5RfS
track_name          Dangerous Behaviour - Tom Ferry Remix
acousticness                                      0.00602
danceability                                        0.675
duration_ms                                        151179
energy                                              0.763
instrumentalness                                  0.00238
key                                                     8
liveness                                            0.223
loudness                                           -5.807
mode                                                    0
speechiness                                        0.0385
tempo                                             120.034
time_signature                                          4
valence                                             0.693
popularity                                             34
Name: 44645, d

In [5]:
tracky = '0XhcbuMPBRRcaqRiHMrnKl'

In [6]:
spot.song_features(tracky)

Number of tracks where no audio features were available: 0
Time to run this code (in seconds): 0.11925030000000447


[{'danceability': 0.544,
  'energy': 0.912,
  'key': 1,
  'loudness': -5.515,
  'mode': 1,
  'speechiness': 0.0743,
  'acousticness': 0.00757,
  'instrumentalness': 5.64e-05,
  'liveness': 0.0946,
  'valence': 0.794,
  'tempo': 157.603,
  'type': 'audio_features',
  'id': '0XhcbuMPBRRcaqRiHMrnKl',
  'uri': 'spotify:track:0XhcbuMPBRRcaqRiHMrnKl',
  'track_href': 'https://api.spotify.com/v1/tracks/0XhcbuMPBRRcaqRiHMrnKl',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0XhcbuMPBRRcaqRiHMrnKl',
  'duration_ms': 244253,
  'time_signature': 4}]

---

### Neural Net

In [None]:
model.load('Sequential_model.h5')
print('Saved model!')

In [None]:
import pandas as pd
import numpy
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import wandb
from wandb.keras import WandbCallback
from category_encoders import TargetEncoder
from category_encoders import OrdinalEncoder
from tensorflow import keras


seed = 7
numpy.random.seed(seed)

# Important Hyperparameters
batch_size = 100
epochs     = 1
optimizer  = 'adam'

print('Building model...')
model = Sequential()

# Input
model.add(Dense(13, input_dim = 13, activation = 'sigmoid'))

# Hidden
model.add(Dense(28   ,activation = 'sigmoid'))
model.add(Dense(56   ,activation = 'sigmoid'))

# Output
model.add(Dense(130663, activation = 'sigmoid'))
print('Model set.')

In [None]:
#Compile
print('Compiling...')
model.compile(loss      = 'sparse_categorical_crossentropy'
             ,optimizer = optimizer
             ,metrics   = ['accuracy']
             )

print('Finished.')

In [None]:
model.save('Sequential_model.h5')
print('Saved model!')

In [None]:
from tensorflow.keras.models import load_model


model = load_model('Sequential_model.h5')

model.summary()


score = model.evaluate(X_test, y_test, verbose = 0)
print(f'{model.metrics_names[1]}: {score[1] * 100:.2f}')

In [None]:
model.predict(X_test)

In [49]:
print('Building hist...')
history = model.fit(X_train, y_train
#                    ,validation_data = (X_test, y_test)
                   ,epochs          = epochs
                   ,batch_size      = batch_size
                   ,verbose         = False
                   )
print('Hist complete.')

model.summary()

Building hist...
Hist complete.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 13)                182       
_________________________________________________________________
dense_9 (Dense)              (None, 28)                392       
_________________________________________________________________
dense_10 (Dense)             (None, 56)                1624      
_________________________________________________________________
dense_11 (Dense)             (None, 130663)            7447791   
Total params: 7,449,989
Trainable params: 7,449,989
Non-trainable params: 0
_________________________________________________________________


---

In [None]:
import pandas as pd


df = pd.read_csv('http://www.zernach.com/wp-content/uploads/2020/02/SpotifyAudioFeaturesApril2019.csv')

results = {}


for idx, row in df.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], df['id'][i]) for i in similar_indices]

    results[row['id']] = similar_items[1:]
    
print('done!')

In [None]:
def item(id):
    return ds.loc[ds['id'] == id]['description'].tolist()[0].split(' - ')[0]

# Just reads the results out of the dictionary.
def recommend(item_id, num):
    print("Recommending " + str(num) + " products similar to " + item(item_id) + "...")
    print("-------")
    recs = results[item_id][:num]
    for rec in recs:
        print("Recommended: " + item(rec[1]) + " (score:" + str(rec[0]) + ")")

recommend(item_id = 11, num = 5)