# Building our song recommender

In [1]:
from bs4 import BeautifulSoup
import pickle
import requests
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import pairwise_distances_argmin_min
import warnings
warnings.filterwarnings('ignore')

In [2]:
# first to get the current billboard100

r = requests.get('https://www.billboard.com/charts/hot-100/').content
soup = BeautifulSoup(r, 'html.parser')
# Using the code we developed to get the billboard100 in the web scraping lab
artists = soup.find_all("span", attrs={"class": "c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only"})
artists

N1_artist = soup.find_all('span', attrs = {'class' :"c-label a-no-trucate a-font-primary-s lrv-u-font-size-14@mobile-max u-line-height-normal@mobile-max u-letter-spacing-0021 lrv-u-display-block a-truncate-ellipsis-2line u-max-width-330 u-max-width-230@tablet-only u-font-size-20@tablet"})
artists = N1_artist + artists

artist_names = []
for artist in artists:
    artist_names.append(artist.get_text(strip = True))
    
    
songs = soup.find_all('h3', attrs={'class':"c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 lrv-u-font-size-18@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-330 u-max-width-230@tablet-only"})

N1_song = soup.find_all('h3', attrs={'class' :"c-title a-no-trucate a-font-primary-bold-s u-letter-spacing-0021 u-font-size-23@tablet lrv-u-font-size-16 u-line-height-125 u-line-height-normal@mobile-max a-truncate-ellipsis u-max-width-245 u-max-width-230@tablet-only u-letter-spacing-0028@tablet" })
songs = N1_song + songs

song_names = []
for song in songs:
    song_names.append(song.get_text(strip = True))
    
    
billboard100 = pd.DataFrame(zip(artist_names,song_names), columns = ['artist', 'song_name'])
billboard100['number'] = range(1, 101)
billboard100.index = billboard100.index + 1
billboard100

Unnamed: 0,artist,song_name,number
1,Doja Cat,Paint The Town Red,1
2,SZA,Snooze,2
3,Luke Combs,Fast Car,3
4,Taylor Swift,Cruel Summer,4
5,Zach Bryan Featuring Kacey Musgraves,I Remember Everything,5
...,...,...,...
96,Tim McGraw,Standing Room Only,96
97,Rod Wave,Checkmate,97
98,Dylan Scott,Can't Have Mine,98
99,Victoria Monet,On My Mama,99


In [3]:
#Load in our ML model

with open('spotify_files/kmeans_model.pkl', 'rb') as model_file:
    kmeans = pickle.load(model_file)
    
kmeans

In [4]:
#Initialize spotipy with user credentials to use for input songs
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id = 'fb35e1e6b7c54dc6b625996ae72f9f81',
                                                          client_secret = '72eeda8532044330ba8a17eacdea3e62'))

In [5]:
spot_df = pd.read_csv('spotify_files/kmeans_spot.csv')
spot_df.drop('Unnamed: 0', axis = 1, inplace = True)
spot_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,artist,song_name,cluster
0,0.679,0.557,0,-7.423,1,0.0281,0.090400,0.032400,0.0866,0.405,85.152,Eve,What It's Like,1
1,0.584,0.911,5,-3.515,0,0.0900,0.011900,0.000000,0.1190,0.606,92.502,Eve,Kaikai Kitan,4
2,0.480,0.482,7,-11.450,0,0.0362,0.250000,0.000289,0.3450,0.684,128.160,Eve,Hey Lover,4
3,0.568,0.940,1,-5.401,1,0.0411,0.000431,0.000050,0.0779,0.738,97.096,Eve,Father Of Mine,1
4,0.509,0.448,4,-7.552,0,0.0357,0.007130,0.013700,0.2440,0.221,156.909,Eve,I See Red,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11600,0.952,0.478,5,-8.520,0,0.3610,0.774000,0.000000,0.1120,0.568,118.008,Gunna,Drip or Drown,3
11601,0.773,0.422,1,-4.572,0,0.1870,0.007830,0.006930,0.1290,0.488,77.502,Gunna,pushin P (feat. Young Thug),4
11602,0.793,0.308,0,-10.414,0,0.0466,0.613000,0.000017,0.0953,0.460,135.143,Gunna,Banking On Me,2
11603,0.795,0.550,7,-5.704,0,0.0882,0.123000,0.000000,0.0873,0.152,119.975,Gunna,ca$h $hit,4


In [6]:
# scaling our existing df for function.
x = spot_df[['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]
scaler = StandardScaler()

x_prep =scaler.fit_transform(x)

scaled_df = pd.DataFrame(x_prep, columns=x.columns)
scaled_df['song_name'] = spot_df['song_name']
scaled_df['artist'] = spot_df['artist']
scaled_df['cluster'] = spot_df['cluster']
scaled_df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,song_name,artist,cluster
0,0.559399,-0.315743,-1.450811,0.254141,0.670341,-0.537282,-0.646995,-0.101337,-0.662517,-0.669045,-1.294068,What It's Like,Eve,1
1,-0.042352,1.303863,-0.044333,1.175757,-1.491778,0.107190,-0.922561,-0.294303,-0.466466,0.153867,-1.031851,Kaikai Kitan,Eve,4
2,-0.701111,-0.658880,0.518257,-0.695538,-1.491778,-0.452948,-0.086736,-0.292582,0.901056,0.473207,0.240280,Hey Lover,Eve,4
3,-0.143699,1.436543,-1.169515,0.730985,0.670341,-0.401932,-0.962822,-0.294008,-0.715161,0.694287,-0.867956,Father Of Mine,Eve,1
4,-0.517419,-0.814436,-0.325629,0.223719,-1.491778,-0.458154,-0.939305,-0.212710,0.289907,-1.422358,1.265926,I See Red,Eve,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11600,2.288642,-0.677181,-0.044333,-0.004562,-1.491778,2.928705,1.752710,-0.294303,-0.508822,-0.001708,-0.121902,Drip or Drown,Gunna,3
11601,1.154816,-0.933390,-1.169515,0.926486,-1.491778,1.117105,-0.936848,-0.253030,-0.405956,-0.329235,-1.566989,pushin P (feat. Young Thug),Gunna,4
11602,1.281501,-1.454958,-1.450811,-0.451221,-1.491778,-0.344669,1.187537,-0.294203,-0.609874,-0.443870,0.489405,Banking On Me,Gunna,2
11603,1.294169,-0.347769,0.518257,0.659529,-1.491778,0.088449,-0.532556,-0.294303,-0.658282,-1.704850,-0.051727,ca$h $hit,Gunna,4


# Building the Song recommender

In [10]:
def recommend_song():
    # Getting the artist name and song id from input
    artist_name = input('Enter the artist name: ')
    song_name = input('Choose a song: ')
        
    try: # Adding some error handling if song not found
        query = f'track:{song_name} artist:{artist_name}'
        results = sp.search(q= query, limit=1)
        if results['tracks']['items']:
            track_id = results['tracks']['items'][0]['id']
        else:
            print(f"No track found for '{song_name}'")
            return  # Exit the function if no track is found
    
        # Checking if song is on the Billboard100
        if song_name.lower() in billboard100['song_name'].str.lower().values:
            number = billboard100[billboard100['song_name'].str.lower() == song_name.lower()]['number'].values[0]
            print(f'{song_name} is currently Number {number} on the Billboard100.')
        else:
            print(f'{song_name} is not currently on the Billboard100.')
    
        # Get song features with the id
        audio_features = sp.audio_features(track_id)
        df_ = pd.DataFrame(audio_features)
        new_features = df_[x.columns]
        
        # Scale our new features
        scaled_x = scaler.transform(new_features)
        
        #clustering our new features
        cluster = kmeans.predict(scaled_x)
        

        # Filter dataset to predicted cluster
        filtered_df = np.array(scaled_df[scaled_df['cluster'] == cluster[0]][x.columns], order="C")
        

        # Get closest song from filtered dataset
        closest, _ = pairwise_distances_argmin_min(scaled_x, filtered_df)

        
        # Return recommended song in a readable way
        recommended_song_name = scaled_df.loc[closest[0]]['song_name']
        recommended_artist = scaled_df.loc[closest[0]]['artist']
        print('\n[RECOMMENDED SONG]')
        # added ccluster song was in and closest for observation 
        print("Cluster:", cluster)
        print("Closest Song:", closest)
        return f'{recommended_song_name} - {recommended_artist}'
    
    except Exception as e:
        # Handle any other exceptions that may occur
        print(f"An error occurred: {e}")

In [14]:
recommend_song()



Enter the artist name: fall out boy
Choose a song: sugar were goin down
sugar were goin down is not currently on the Billboard100.

[RECOMMENDED SONG]
Cluster: [0]
Closest Song: [370]


'Can’t Forget You (feat. James Blunt) - James Blunt'