![image](../images/gnod_2nd_iteration.jpg)

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from getpass import getpass
from tqdm.notebook import tqdm
import pandas as pd
import joblib
from time import sleep
from IPython.core.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

In [2]:
def spotipy_connection():
    print("Insert Spotipy Client ID:")
    c_id = str(getpass())
    print("Insert Spotipy Client Secret:")
    c_s = str(getpass())
    
    
    return c_id,c_s

In [3]:
c_id,c_s = spotipy_connection()

Insert Spotipy Client ID:
········
Insert Spotipy Client Secret:
········


In [4]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=c_id,
                                                          client_secret=c_s))

In [5]:
def first_input ():
    """
    inputs a string of at least 2 characters, returns that string stripped from whitespace
    """
    inp = str(input("Insert the name of a song: ")).strip()
    while len(inp)<2:
        inp = str(input("please write at least 2 characters as a name: ")).strip()
    return inp

In [6]:
def matching_songs(inp,song,artists,songid):
    """
    creates a dframe of songs where inp matches a song name
    will return a list with song, artist and an index for these songs 
    """
    
    # We create a list of songs where the inputted string matches
    matches = song.str.lower().str.find(inp.lower())
    songs = [(song[i],artists[i],songid[i]) for i,x in enumerate(matches) if x>=0]
    
    # If there is no matches whatsoever we return false, if any match, return songs list 
    if len(songs)==0:
        return None
    else:
        return songs
    



In [20]:
def select_song(songs):
    """
    will return the index of the original chart in where the song is located, false if no matches. 
    """
    
    if len(songs)==1:    
        print("\nDid you mean this song?")
    else:
        print("\nDid you mean any of these?")
        
    # displaying the options and ask for selection
    for i,song in enumerate(songs):
        print("{}-> {} | {}".format(str(i),song[0],song[1]))   
    
    sel = input("If yes, choose song number. if not, write N: ").strip()
    
    #we use try/except to raise errors if the index is out of range, or if the input can't be converted to an int 
    while sel.lower()!="n":
        try:
            chose = songs[int(sel)]
            sleep(.5)
            print("You chose:\n{} | {} ".format(chose[0],chose[1]))
            if not isinstance(chose[2],int):
                display(HTML('<iframe src="https://open.spotify.com/embed/track/{}" width="320" height="80" frameborder="0" allowtransparency="true" allow="encrypted-media"></iframe>'.format(chose[2])))
            return chose[2]
        except ValueError:
            sel = input("Sorry, I didn't understand! Choose number if yes, N if not.").strip()
        except IndexError:
            sel = input("Please choose one of the songs or write N if not: ").strip()
    
    return False 

In [8]:
def hot_songs(inp):
    
    hot_100 = pd.read_csv("../data/top-100-songs-chart.csv", index_col=0)
    
    matching = matching_songs(inp,hot_100['song'],hot_100['artist'],hot_100['rank'])
    
    if matching == None:
        return inp
    else:
        song_ind = select_song(matching)
    
    if song_ind == False:
        return inp
    else:
        out = hot_100[hot_100['rank']!=song_ind][['song','artist','rank']]
        out.rename(columns = {'song': 'name', 'artist': 'artists','rank':'id'}, inplace = True)
        return  out.sample(10)
        

In [9]:
def spotify_query(inp,offs = 0):
    #queries 10 song names to spotify api
    tracks_query = sp.search(str(inp), limit=10, type='track', offset=offs)['tracks']['items']
    songs = []
    for track in tracks_query:
        sp_song = track['name']
        sp_artists = ", ".join([artist['name'] for artist in track['artists']])
        sp_id = track['id']
        songs.append((sp_song,sp_artists,sp_id))
    return songs
    
    

In [10]:
def spotify_inp(inp):
    #Tis can be improved to roll in various pages of the query, or by asking a new input
    sp_songs = spotify_query(inp)
    
    selection = select_song(sp_songs)

    return selection
   

In [11]:
def spotify_recommender(song_id):
    #loading the model and the cluster-song list
    scaler = joblib.load('../data/model/std_scaler.bin')
    model = joblib.load('../data/model/kmeans_model.sav')
    cluster_id = pd.read_csv('../data/model/cluster-id-dataframe.csv',index_col = 0)
    
    #retrieve the features from api
    song_feats = sp.audio_features(song_id)[0]
    features = ['danceability', 'energy', 'key', 'loudness', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence'] 
    result = pd.DataFrame({key:song_feats[key] for key in features},index = [0])
    
    #we scale it with our scaler 
    scaled_feat = scaler.transform(result)
    
    #we predict to which cluster it belongs
    clust_pred = model.predict(scaled_feat)[0]
    
    #and create a 10 song sample id list from songs in the same category
    songs_category = cluster_id[cluster_id['cluster']==clust_pred]
    recomm = songs_category.sample(10)[['name','artists','id']]
    
    return recomm

In [12]:
def print_recommendation(recomm):
    
    for i,song_id in enumerate(recomm['id']):
        song = recomm['name'].iloc[i]
        artist = recomm['artists'].iloc[i]
        sleep(1)
        print("\nYour Recommendation:\n{} | {} ".format(song,artist))
        if not isinstance(song_id,int):
            display(HTML('<iframe src="https://open.spotify.com/embed/track/{}" width="320" height="80" frameborder="0" allowtransparency="true" allow="encrypted-media"></iframe>'.format(song_id)))
        
        sleep(2)
        if i != len(recomm['id'])-1:
            a = input("Would you like one more suggestion? Y/N: ")
            if a.lower() == "n":
                print("\n WELL, YOU'RE WELCOME :-|")
                return False 
    return True


In [15]:
def final_song_recommender():
    
    # we take the first input 
    inp = first_input()
    # check if a hot song
    recomm = hot_songs(inp)
    # if its not a hot song, itll return the string inputted, and will check on spotify 
    if isinstance(recomm,str):
        # get the input, return the song ID
        song_id = spotify_inp(inp)
        # retrieve the features, fit the model, get a list of songs that match the cluster
        in_cluster = spotify_recommender(song_id)
        #we rename it so it's printed by the last function
        recomm = in_cluster
    #prints first element of the output, and keeps printing available results up to 10 
    print_recommendation(recomm)
    
    
    
    

In [None]:
final_song_recommender()

Insert the name of a song: da

Did you mean any of these?
0-> Time Today | Moneybagg Yo
1-> Dakiti | Bad Bunny & Jhay Cortez
2-> Good Days | SZA
3-> Damage | H.E.R.
4-> 4 Da Gang | 42 Dugg & Roddy Ricch
5-> Clear Da Air | Moneybagg Yo
If yes, choose song number. if not, write N: 3
You chose:
Damage | H.E.R. 



Your Recommendation:
My Head And My Heart | Ava Max 
