# Song Recommendations App

## Installer, Libraries and Web-Scraping

In [1]:
!pip install spotipy



In [2]:
import time
import numpy as np
import re
import pandas as pd
import requests
from bs4 import BeautifulSoup
from config import *
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= Client_ID, client_secret= Client_Secret))

In [4]:
url = "https://www.billboard.com/charts/hot-100"
download = requests.get(url)
top_100 = BeautifulSoup(download.text, "html.parser")

In [5]:
# After inspecting the website on Chrome, found the information we're looking at under the tag: span > chart-element__information
top_100.select("span.chart-element__information")

[<span class="chart-element__information">
 <span class="chart-element__information__song text--truncate color--primary">Montero (Call Me By Your Name)</span>
 <span class="chart-element__information__artist text--truncate color--secondary">Lil Nas X</span>
 <span class="chart-element__information__delta color--secondary">
 <span class="chart-element__information__delta__text text--default">-</span>
 <span class="chart-element__information__delta__text text--last">- Last Week</span>
 <span class="chart-element__information__delta__text text--peak">1 Peak Rank</span>
 <span class="chart-element__information__delta__text text--week">1 Weeks on Chart</span>
 </span>
 </span>,
 <span class="chart-element__information">
 <span class="chart-element__information__song text--truncate color--primary">Peaches</span>
 <span class="chart-element__information__artist text--truncate color--secondary">Justin Bieber Featuring Daniel Caesar &amp; Giveon</span>
 <span class="chart-element__information__

### Read Module - Pickle

In [6]:
import pickle

pickle_in = open("kmeans_features.pickle","rb")
features_kmean = pickle.load(pickle_in)

## Creating DataFrames

In [7]:
# Creating a list of the artists sorted by 1st to last
top_100_artist = top_100.select("span.chart-element__information__artist")
top_100_artist = [elem.get_text() for elem in top_100_artist]


# Creating a list of the songs sorted by 1st to last (relative to list of artists)
top_100_song = top_100.select("span.chart-element__information__song")
top_100_song = [elem.get_text() for elem in top_100_song]


# Creating a list of the rankings (1-100)
top_100_rank = list(range(1,101))


top_100_DF = pd.DataFrame({"Rank": top_100_rank ,"Artist" : top_100_artist, "Song" : top_100_song})
top_100_DF

Unnamed: 0,Rank,Artist,Song
0,1,Lil Nas X,Montero (Call Me By Your Name)
1,2,Justin Bieber Featuring Daniel Caesar & Giveon,Peaches
2,3,Silk Sonic (Bruno Mars & Anderson .Paak),Leave The Door Open
3,4,Cardi B,Up
4,5,Olivia Rodrigo,Drivers License
...,...,...,...
95,96,Rod Wave,Shock Da World
96,97,VEDO,You Got It
97,98,Rod Wave,Sneaky Links
98,99,Dylan Scott,Nobody


In [8]:
def get_playlist_tracks(playlist_id):
    '''
    Using sp.playlist is only limited to the first 100 tracks.
    This function returns all tracks from the spotify-playlist_id.
    '''
    results = sp.user_playlist_tracks("spotify", playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

#------------------------------------------------------------


def get_artists_from_track(track):
    '''
    This function gets artist name from track
    '''
    return [artist["name"] for artist in track["artists"]]


#------------------------------------------------------------


def get_artists_from_playlist(playlist_id):
    '''
    This function uses the "get_artists_from_track" to fetch the artist names in a playlist.
    '''
    tracks_from_playlist = get_playlist_tracks(playlist_id)
    return list([get_artists_from_track(track["track"])[0] for track in tracks_from_playlist])


#------------------------------------------------------------


def get_audio_features_from_uri(uri_list):
    """
    This function returns audio features of tracks by uri's (input needs to be a uri list)
    """
    audio_features = []
    counter = 0 # Counts number of uris
    feat_uris = []
    for item in uri_list:
        counter += 1
        feat_uris.append(item)
        if counter == 100: # Needs to be grouped by 100's since api only allows 100 sp.audio_features requests at a time.
            audio_features.append(sp.audio_features(feat_uris))
            counter = 0
            feat_uris = []
            
    audio_features.append(sp.audio_features(feat_uris))
    return audio_features

In [9]:
### Using the functions above, this section gets track infos from multiple playlists into lists.

spoti_playlists = ["5wD3jt28mHg3V6HWjOKPfY", "1fcn9HWvodDldJ5eCH4EQO", "0rRmb79AWcRCcfB69qt3ds", "7IiVIXvEEYSio3MGu2tFpP", "3EtliZeGzwZqb8nr7r5Zp6", "1IF4SWhozHVEwidHhLWwbN", "1638KZlvcvyyEJ15S8erge", "2ClhJXe7qlZSFYzqjvZnPT", "28wnlqwIjjbc0wbG1074Dx", "4jVvhTA6Xf56C1uBR48HVF", "0MG9jIyagfVWDLhShEhvbg", "0T1VStDOQS8zKsdtslT4CQ", "0LnqdeU0cARj8S5H0KD5aS", "7nPmEwg1MGaYMyk3t0iRfl", "5Urzf8ruqboijSr6Q3k7Oq", "5iO4LyZAjwCBeTmI4a1hWb", "2xjSII47MmlFhEROR1MpHB", "5dBxORRX097LGt0sdV1nH7", "2qCfbEwciOfMbj3tg3kLnX", "7K5N0tc6x3fuIwJduSUwBG", "7E5KftDmvrx3q8y80bTsXY", "0An7X1arwAAalaVPSU8ee9", "37i9dQZF1DXbS5WTN5nKF7", "1DjoPXKR3GfAaFNnZ9dhpz", "7sBm2kJcW3NlwyJsFOdQ4y", "2V2LaNOiAZYIVTEnTCiuFx", "0lbSm7P3cJoRtpEsmv0ffO", "6FKDzNYZ8IW1pvYVF4zUN2", "37i9dQZF1DWTJ7xPn4vNaz", "6ftISEMV7Gy3couydcyBVa", "6W0Zrtc9pUiXk32Fb0ZMyU", "37i9dQZF1DWXRqgorJj26U", "37i9dQZF1DX3oM43CtKnRV", "0FAb3s3yJArWnikZbEOO9p", "5fWeZI5FLdUkowl4qaglPi", "482XK9cqY3NjMiLZN2aXNt", "13YDWMOgGy2xUwhEDxc9NE", "1DpzmhqivDeSF5QBuzpvER", "2xcYmkUS2oLmIUCFmXWIR4", "37i9dQZF1DX04mASjTsvf0", "1rLuENR0J3kQ2DNXJ0bp6x", "70M2GdJysbCKToAuZkWw7a", "7JJy3JRnYGhJeHQl5XeptL"]
song_artists = []
song_names = []
song_uris = []


for i_d in spoti_playlists:
    song_artists.append(get_artists_from_playlist(i_d))
song_artists = [subitem for item in song_artists for subitem in item]


for PL in spoti_playlists:
    for item in get_playlist_tracks(PL):
        song_names.append(item["track"]["name"])
        song_uris.append(item["track"]["uri"])




In [10]:
# This section uses the lists created above and adds them into 2 dataframes that can get merged together.


song_dict = {"Song" : song_names, "Artist" : song_artists , "URI" : song_uris}
songs_df = pd.DataFrame.from_dict(song_dict) # Defining song DF (include's the data from song_dict)
songs_df = songs_df.drop_duplicates(['Song','Artist'],keep= 'last') # Removes duplicated tracks that have different URIs
songs_df = songs_df.reset_index(drop=True)

clean_uris = songs_df["URI"].tolist()


song_features = get_audio_features_from_uri(clean_uris)
song_features = [subitem for item in song_features for subitem in item]

features_df = pd.DataFrame(song_features) # Defining features DF in preperation for merging with song df
features_df



Spotify_DF = songs_df.merge(features_df, left_index=True, right_index=True).drop(columns=["uri"])

Spotify_DF = Spotify_DF.drop_duplicates() # DROP duplicates to make sure data is clean.

In [11]:
features_df.to_csv(path_or_buf= "/Users/FH/Desktop/Ironhack/Github/Project-week-3-Recommendations/Data/audio_features.csv", index = False)
Spotify_DF.to_csv(path_or_buf= "/Users/FH/Desktop/Ironhack/Github/Project-week-3-Recommendations/Data/spotify_df.csv", index = False)

In [12]:
# features_kmean from pickle / features is the clean version of features_df that could be used for clusters
features = pd.read_csv("/Users/FH/Desktop/Ironhack/Github/Project-week-3-Recommendations/Data/audio_features.csv")
features = features.drop(columns = ["type", "id", "uri", "track_href", "analysis_url", "duration_ms", "time_signature"])


clusters = features_kmean.predict(features)

In [13]:
Spotify_DF["Cluster"] = clusters
Spotify_DF

Unnamed: 0,Song,Artist,URI,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,type,id,track_href,analysis_url,duration_ms,time_signature,Cluster
0,Anywhere,Rita Ora,spotify:track:7EI6Iki24tBHAMxtb4xQN2,0.628,0.797,11,-3.953,0,0.0596,0.0364,...,0.1040,0.321,106.930,audio_features,7EI6Iki24tBHAMxtb4xQN2,https://api.spotify.com/v1/tracks/7EI6Iki24tBH...,https://api.spotify.com/v1/audio-analysis/7EI6...,215064,4,2
1,Book Of Love,Felix Jaehn,spotify:track:47NTskHaXv46jp21vFj8tD,0.656,0.835,9,-4.915,0,0.0278,0.1660,...,0.2180,0.455,125.023,audio_features,47NTskHaXv46jp21vFj8tD,https://api.spotify.com/v1/tracks/47NTskHaXv46...,https://api.spotify.com/v1/audio-analysis/47NT...,198439,4,0
2,Thunderclouds,Sia,spotify:track:5a8RPWgKSmcGBGcffmIrUi,0.691,0.716,7,-5.985,1,0.0351,0.0085,...,0.2630,0.507,112.035,audio_features,5a8RPWgKSmcGBGcffmIrUi,https://api.spotify.com/v1/tracks/5a8RPWgKSmcG...,https://api.spotify.com/v1/audio-analysis/5a8R...,187027,4,0
3,Ensemble,Kids United,spotify:track:0IB4sNc2voixWTSN5QMRAw,0.709,0.629,8,-5.865,1,0.0417,0.0986,...,0.1010,0.133,115.103,audio_features,0IB4sNc2voixWTSN5QMRAw,https://api.spotify.com/v1/tracks/0IB4sNc2voix...,https://api.spotify.com/v1/audio-analysis/0IB4...,186352,4,0
4,hate u love u,Olivia O'Brien,spotify:track:6ATgDc6e4sPn84hvJsAmPt,0.407,0.416,6,-8.915,0,0.1740,0.6560,...,0.0981,0.246,176.947,audio_features,6ATgDc6e4sPn84hvJsAmPt,https://api.spotify.com/v1/tracks/6ATgDc6e4sPn...,https://api.spotify.com/v1/audio-analysis/6ATg...,175467,4,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13936,Respect,Aretha Franklin,spotify:track:7s25THrKz86DM225dOYwnr,0.805,0.558,0,-5.226,1,0.0410,0.1640,...,0.0546,0.965,114.950,audio_features,7s25THrKz86DM225dOYwnr,https://api.spotify.com/v1/tracks/7s25THrKz86D...,https://api.spotify.com/v1/audio-analysis/7s25...,147600,4,0
13937,Chain of Fools,Aretha Franklin,spotify:track:7jVNFF0E9Zm04J8ezjLuNq,0.760,0.613,10,-9.035,1,0.0517,0.1810,...,0.0957,0.944,117.007,audio_features,7jVNFF0E9Zm04J8ezjLuNq,https://api.spotify.com/v1/tracks/7jVNFF0E9Zm0...,https://api.spotify.com/v1/audio-analysis/7jVN...,168013,4,0
13938,Think,Aretha Franklin,spotify:track:0jkBzn7J5jc889TYxwvXy6,0.690,0.750,6,-6.449,1,0.0442,0.3740,...,0.6030,0.905,110.102,audio_features,0jkBzn7J5jc889TYxwvXy6,https://api.spotify.com/v1/tracks/0jkBzn7J5jc8...,https://api.spotify.com/v1/audio-analysis/0jkB...,138720,4,2
13939,Best of My Love,The Emotions,spotify:track:2M2WJ7gBlcKNxdhyfPp9zY,0.784,0.713,0,-6.865,1,0.0339,0.4470,...,0.0904,0.970,115.443,audio_features,2M2WJ7gBlcKNxdhyfPp9zY,https://api.spotify.com/v1/tracks/2M2WJ7gBlcKN...,https://api.spotify.com/v1/audio-analysis/2M2W...,220560,4,0


In [14]:
# Creating Song by Artist column that is used to re-search the users query with the artist they select.


Spotify_DF_C1 = Spotify_DF[Spotify_DF['Cluster'] == 0].reset_index()
Spotify_DF_C2 = Spotify_DF[Spotify_DF['Cluster'] == 1].reset_index()
Spotify_DF_C3 = Spotify_DF[Spotify_DF['Cluster'] == 2].reset_index()

Spotify_DF_C1["Song by Artist"] = Spotify_DF_C1['Song'] +" by " + Spotify_DF_C1['Artist']
Spotify_DF_C2["Song by Artist"] = Spotify_DF_C2['Song'] +" by " + Spotify_DF_C2['Artist']
Spotify_DF_C3["Song by Artist"] = Spotify_DF_C3['Song'] +" by " + Spotify_DF_C3['Artist']


## Recommender Code

In [15]:
def next_recommendation(x, y):
    '''
    This function prints the next recommendadtion then pops it from list.
    x = song list
    y = artist list
    '''
    x.pop(0)
    y.pop(0)
    output = print("Here's another one: ", x[0], "by ", y[0])
    return output

In [16]:
def query_artists(user_query):
    '''
    baesd on what the user serches, we return the *unique* possible singers from the top 5 results of the search
    '''
    search = sp.search(q = user_query, limit=5 )
    top_5_results = search['tracks']["items"]
    top_5_artist_results = list(set([ i["artists"][0]['name'] for i in top_5_results]))
    return top_5_artist_results

In [29]:
song_list = top_100_DF["Song"].tolist() # List used to create the song recomendations
artist_list = top_100_DF["Artist"].tolist() # List used for printing the artist name relative to the song.

while song_list != []:
    print("Welcome to Gnoosic! ")
    time.sleep(0.8)
    print("Give us a song and we'll recommend you something similar.")     
    time.sleep(1.1)
    user_input = input("Please type your favourite song here: ").lower().title()
    user_input2 = ""
    if user_input in song_list:
        print("Searching...")
        time.sleep(0.8)
        print("Finding songs similar to: ", song_list[song_list.index(user_input)], "by ", artist_list[song_list.index(user_input)])
        
        artist_list.pop(song_list.index(user_input)) # Removes the input-artist from artist_list using index of song input > Has to be poped first otherwise > ERROR
        song_list.pop(song_list.index(user_input)) # Removes the input-song from user_input
        
        time.sleep(0.4) 
        print()
        print("Try this one out: ", song_list[0], "by ", artist_list[0])
        time.sleep(0.8)
        
        user_input2 = input("Write yes if you'd like another one, no to stop: ").lower()
        while user_input2 == "yes":
            print()
            next_recommendation(song_list, artist_list)
            user_input2 = ""
            user_input2 = input("Write yes if you'd like another one, no to stop: ").lower()
        else:
            print("Adios Amigo")
            break
    
    
    
    
    else:
        #query_artists(user_input)
        top_5_artist_results = query_artists(user_input)
        number_of_artists = len(top_5_artist_results)
        counter = 0

        while number_of_artists > 0:
            print("is it by: ", query_artists(user_input)[counter])
            artist_input = ""
            artist_input = input("Type 'yes' if it is: ").lower()
            if artist_input == "yes":
                print("searching for '", user_input,"' by: ", query_artists(user_input)[counter], "...")
                time.sleep(1.3)
                SR_URI = sp.search(q= user_input + " " + query_artists(user_input)[counter], limit=1)
                SR_features = pd.DataFrame(sp.audio_features(SR_URI['tracks']['items'][0]['uri']))
                SR_features = SR_features.drop(columns = ["type", "id", "uri", "track_href", "analysis_url", "duration_ms", "time_signature"])
                SR_cluster = features_kmean.predict(SR_features)
                if SR_cluster[0] == 0:
                    print("Try this one out: ", ''.join(Spotify_DF_C1.sample()["Song by Artist"].tolist()))
                if SR_cluster[0] == 1:
                    print("Try this one out: ", ''.join(Spotify_DF_C2.sample()["Song by Artist"].tolist()))
                if SR_cluster[0] == 2:
                    print("Try this one out: ", ''.join(Spotify_DF_C3.sample()["Song by Artist"].tolist()))
                print()
                user_input3 = input("Would you like another one? ").lower()
                
                while user_input3 == "yes":
                    if SR_cluster[0] == 0:
                        print("Try this one out: ", ''.join(Spotify_DF_C1.sample()["Song by Artist"].tolist()))
                        user_input3 = ""
                        time.sleep(0.8)
                        print()
                        user_input3 = input("Would you like another one? ").lower()
                    if SR_cluster[0] == 1:
                        print("Try this one out: ", ''.join(Spotify_DF_C2.sample()["Song by Artist"].tolist()))
                        user_input3 = ""
                        time.sleep(0.8)
                        print()
                        user_input3 = input("Would you like another one? ").lower()                        
                    if SR_cluster[0] == 2:
                        print("Try this one out: ", ''.join(Spotify_DF_C3.sample()["Song by Artist"].tolist()))
                        user_input3 = ""
                        time.sleep(0.8)
                        print()
                        user_input3 = input("Would you like another one? ").lower()
                        
                    else:
                        time.sleep(0.5)
                        print("Here we go again.. ") 
                        print()
  
                break
            
            else:
                counter += 1
                print()    
            if number_of_artists == counter:
                print("Sorry we couldn't find your track, please try with another one") 
                break


Welcome to Gnoosic! 
Give us a song and we'll recommend you something similar.
Please type your favourite song here: beat it
is it by:  Michael Jackson
Type 'yes' if it is: no

is it by:  Fall Out Boy
Type 'yes' if it is: no

is it by:  The Beatles
Type 'yes' if it is: yes
searching for ' Beat It ' by:  The Beatles ...
Try this one out:  Sky Is the Limit by DJ Antoine

Would you like another one? yes
Try this one out:  Fire With Fire by Scissor Sisters



KeyboardInterrupt: Interrupted by user