# SpotiFinder


---

## Spotify request class

In [35]:
import sys
import json
import timeit
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv


class SpotiFinder():
    '''docstring'''
    load_dotenv()
    
    
    def __init__(self):
        self.sp       = None
        self.token    = None
        self.username = None
        
        
    def authenticate(self):
        '''The username needs to be the one written on the top right of spotify.
        I can\'t find a way to get this infor without asking for it from a user, but it has to be done.'''
        
        self.username = 'a4kqw8b9sws8zwd5bfjdjy4zk'
        scope         = 'user-library-read'
        self.token    = util.prompt_for_user_token(self.username
                                                  ,scope
                                                  )
        self.sp       = spotipy.Spotify(auth = self.token)
        
    def get_saved_tracks(self, limit = 20):
        '''Grabs a list of all a user's saved tracks ids'''
        

        saved_tracks  = self.sp.current_user_saved_tracks(limit = limit)
        track_id_list = []
        
        for track in saved_tracks['items']:
            track_id_list.append(track['track']['id'])
        return track_id_list
    
    def track_list_features(self, track_id_list):
        '''Takes in a list of track ids and returns a feature list for use with our nearest neighbors model.'''
        
        
#         start        = timeit.default_timer()
        rows         = []
        batchsize    = 100
        None_counter = 0

        for iterator in range(0, len(track_id_list), batchsize):

            batch           = track_id_list[iterator: iterator + batchsize]
            feature_results = self.sp.audio_features(batch)

            for i, features in enumerate(feature_results):
                if features == None:
                    None_counter = None_counter + 1
                else:
                    rows.append(features)

#         print('Number of tracks where no audio features were available:', None_counter)
#         stop = timeit.default_timer()
#         print('Time to run this code (in seconds):', stop - start)
        return rows


    def get_saved_playlists(self, limit = 50):
        '''Grabs a dictionary of all a user's saved playlists and the track ids within them.'''
        
        
        playlists      = self.sp.user_playlists(self.username, limit = limit)
        dict_playlists = {}
        list_track_ids = []

        for playlist in playlists['items']:
            for track in sp.playlist(playlist['id'], fields = "tracks,next")['tracks']['items']:
                list_track_ids.append(track['track']['id'])
            dict_playlists.update({playlist['name']: list_track_ids})
        
        return dict_playlists
    
#     def track_id_list(username='spotify'):
#         def show_tracks(tracks):
#             for i, item in enumerate(tracks['items']):
#                 track = item['track']
#                 print("   %d %32.32s %s" % (i, track['artists'][0]['name'],
#                     track['name']))

#         sp = spotipy.Spotify(auth=token)
#         playlists = sp.user_playlists(username)
#         for playlist in playlists['items']:
#             if playlist['owner']['id'] == username:
#                 print()
#                 print(playlist['name'])
#                 print ('  total tracks', playlist['tracks']['total'])
#                 results = sp.playlist(playlist['id'],
#                     fields="tracks,next")
#                 tracks = results['tracks']
#                 show_tracks(tracks)
#                 while tracks['next']:
#                     tracks = sp.next(tracks)
#                     show_tracks(tracks)
#         return show_tracks(tracks)
    
    def track_lookup(self, track_ids):
        '''Starting to think this was all a waste of tiiiiiime.'''
        
        
        track = self.sp.track(track_ids)
        return track

### Manual Control

In [109]:
from dotenv import load_dotenv
import spotipy
import spotipy.util as util
# from spotipy.oauth2 import SpotifyClientCredentials



load_dotenv()

username      = 'a4kqw8b9sws8zwd5bfjdjy4zk'
scope         = 'user-library-read'
token         = util.prompt_for_user_token(username
                                          ,scope
                                          )

sp            = spotipy.Spotify(auth = token)

In [69]:
# Creates a json file of tracks
# with open('tracks.json', 'w') as outfile:
#     json.dump(tracks['items'][0]['track'], outfile)

---

## Nearest Neighbors Model

In [37]:
from sklearn.neighbors import NearestNeighbors
import pandas as pd


df       = pd.read_csv('http://www.zernach.com/wp-content/uploads/2020/02/SpotifyAudioFeaturesApril2019.csv')

target   = 'track_id'
features = ['acousticness'
           ,'danceability'
           ,'energy'
           ,'instrumentalness'
           ,'key'
           ,'liveness'
           ,'loudness'
           ,'mode'
           ,'speechiness'
           ,'tempo'
           ,'valence'
           ]

X = df[features]
y = df[target]


nearest = 10
nn      = NearestNeighbors(n_neighbors = nearest
                          ,algorithm   = 'kd_tree'
                          )
nn.fit(X, y)

NearestNeighbors(algorithm='kd_tree', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                 radius=1.0)

In [39]:
# Save
import joblib


filename = 'Nearest_Neighbors.sav'
joblib.dump(nn, filename)
print('Model saved!')

Model saved!


In [3]:
# Load
import joblib


filename = 'Nearest_Neighbors.sav'
nn       = joblib.load(filename)
print('Model Loaded.')

Model Loaded.


### Model function predictor 9001

In [71]:
def nn_predict(track_id_list, output = 'df'):
    '''
    '''
    import pandas as pd
    import json


    df       = pd.read_csv('http://www.zernach.com/wp-content/uploads/2020/02/SpotifyAudioFeaturesApril2019.csv')
    
    target   = 'track_id'
    features = ['acousticness'
               ,'danceability'
               ,'energy'
               ,'instrumentalness'
               ,'key'
               ,'liveness'
               ,'loudness'
               ,'mode'
               ,'speechiness'
               ,'tempo'
               ,'valence'
               ]
    
    # Needs to be able to grab track info from spotify
    spot = SpotiFinder()
    spot.authenticate()
    
    track_list_unclean  = spot.track_list_features(track_id_list)
    df_tracks_features  = pd.DataFrame(track_list_unclean)[features]
    predictions         = nn.kneighbors(df_tracks_features)[1][0]
    
    if   output == 'df':
        return df.iloc[predictions]
    
    elif output == 'track_ids':
        return df.iloc[predictions]['track_id']
    
    elif output == 'json':
        return json.loads(df.iloc[predictions].to_json())
    

In [73]:
spot   = SpotiFinder()
spot.authenticate()

track_list = spot.get_saved_tracks()
track_list

['5soMJpcVhSrGrB4prvPL6P',
 '4COR2ZPEyUn0lsbAouRWxA',
 '0XhcbuMPBRRcaqRiHMrnKl',
 '5MAPt0beSbEQdPM5SfJwyl',
 '5LfS278Zsmem9pmxmBXgix',
 '5ZGtjzyg3F6XftEyllHDnC',
 '6gHsenqqwzj67gp1OzCIi2',
 '3KtFA8wHjyg44eVtiI4QqE',
 '2xoG88poPOqbdDx89hEkLk',
 '36iHXEANQhbT3VShWXlSKY',
 '4wNOTaNKhUNlw4HvAlrlq1',
 '25VIAdpuCXpUpV1aHKvKoy',
 '5YScXJKtefsgdskIy60N7A',
 '1gF5miexkFs5uZj04Y5t9M',
 '7JsvyaIQfBSX4yVGs6cqVA',
 '4eLQPgkIwZI6OlllntlLdF',
 '1GwdUrz7DujSedNxnzVfqI',
 '68C7Q9IW70v5uaXSXyWVm3',
 '3oVSsJXNADnYAnZWiEWFJH',
 '1CmUZGtH29Kx36C1Hleqlz']

In [77]:
nn_predict(track_list, 'df')

Unnamed: 0,artist_name,track_id,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence,popularity
71478,Of Feather And Bone,74y93ryZxdsfTWlvGAcDb0,Lust for Torment,9e-06,0.253,282600,0.97,0.849,9,0.0358,-8.78,1,0.107,107.999,4,0.0913,17
119224,ODESZA,2DbQxHHEMJjno3WVttevLl,Show Me,0.0026,0.539,218549,0.596,0.534,9,0.0881,-8.389,1,0.0448,107.736,4,0.0467,50
39192,Mr. Fingers,3fOva2jdaAddyxJ4SeBBy6,Electron,0.046,0.594,369139,0.904,0.768,8,0.112,-8.91,1,0.0368,108.0,4,0.462,23
128945,Jeremih,3bTSIHFVorXxkPKykuABt6,Imitate,0.158,0.879,206733,0.373,7.4e-05,9,0.0767,-9.174,1,0.0629,108.031,4,0.524,60
112850,MihTy,3bTSIHFVorXxkPKykuABt6,Imitate,0.158,0.879,206733,0.373,7.4e-05,9,0.0767,-9.174,1,0.0629,108.031,4,0.524,60
48486,Stylo G,4PtLrr54ZnP7GFXixtFxLj,Under Construction,0.0133,0.659,165437,0.651,1.9e-05,9,0.134,-8.35,1,0.173,107.176,4,0.401,32
86277,Corridos Perrones,5h4f8O9GPAFC7nuyWGykJw,La Merca,0.425,0.757,211627,0.669,0.0,9,0.046,-9.233,1,0.056,107.796,4,0.821,7
39064,Bryan J,6QEG7ejj2pagY6gnpG8oWe,You Tonight,0.00142,0.619,180036,0.745,2.1e-05,9,0.102,-8.499,1,0.0375,106.961,4,0.254,4
25576,Reflective Ripples,7J5zKRUtW4rStWHPtdciAd,Hambone Jonez,0.000108,0.433,191295,0.427,0.885,9,0.384,-9.871,1,0.0758,108.267,5,0.34,0
24110,Zach Zimmerman,5Rs4YPehch2feK2e10itTg,She Is My Home,0.738,0.807,147750,0.359,0.0,9,0.119,-8.902,1,0.0381,108.043,4,0.668,21


---

### Neural Net Model (Failed)

In [None]:
model.load('Sequential_model.h5')
print('Saved model!')

In [None]:
import pandas as pd
import numpy
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import wandb
from wandb.keras import WandbCallback
from category_encoders import TargetEncoder
from category_encoders import OrdinalEncoder
from tensorflow import keras


seed = 7
numpy.random.seed(seed)

# Important Hyperparameters
batch_size = 100
epochs     = 1
optimizer  = 'adam'

print('Building model...')
model = Sequential()

# Input
model.add(Dense(13, input_dim = 13, activation = 'sigmoid'))

# Hidden
model.add(Dense(28   ,activation = 'sigmoid'))
model.add(Dense(56   ,activation = 'sigmoid'))

# Output
model.add(Dense(130663, activation = 'sigmoid'))
print('Model set.')

In [None]:
#Compile
print('Compiling...')
model.compile(loss      = 'sparse_categorical_crossentropy'
             ,optimizer = optimizer
             ,metrics   = ['accuracy']
             )

print('Finished.')

In [None]:
model.save('Sequential_model.h5')
print('Saved model!')

In [None]:
from tensorflow.keras.models import load_model


model = load_model('Sequential_model.h5')

model.summary()


score = model.evaluate(X_test, y_test, verbose = 0)
print(f'{model.metrics_names[1]}: {score[1] * 100:.2f}')

In [None]:
model.predict(X_test)

In [49]:
print('Building hist...')
history = model.fit(X_train, y_train
#                    ,validation_data = (X_test, y_test)
                   ,epochs          = epochs
                   ,batch_size      = batch_size
                   ,verbose         = False
                   )
print('Hist complete.')

model.summary()

Building hist...
Hist complete.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 13)                182       
_________________________________________________________________
dense_9 (Dense)              (None, 28)                392       
_________________________________________________________________
dense_10 (Dense)             (None, 56)                1624      
_________________________________________________________________
dense_11 (Dense)             (None, 130663)            7447791   
Total params: 7,449,989
Trainable params: 7,449,989
Non-trainable params: 0
_________________________________________________________________
