In [1]:
import requests 
from bs4 import BeautifulSoup 
import pandas as pd
import config
import spotipy
import numpy as np
import json
from spotipy.oauth2 import SpotifyClientCredentials
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import random
from IPython.display import IFrame


songTitlesFromHotList=[]
songArtistsFromHotList=[]
urlHotList = "https://www.billboard.com/charts/hot-100/"

In [2]:
def parseHtmltoFindSongTitlesAndArtists(urlHotList):
    page = requests.get(urlHotList)
    soup = BeautifulSoup(page.content, 'html.parser')
    hot_list = soup.find_all("li", class_="lrv-u-padding-l-1@mobile-max")
    
    for hot_song in hot_list:
        title = hot_song.find('h3', id="title-of-a-story").get_text(strip=True).lower()
        artist = hot_song.find('span').get_text(strip=True).lower()
        songTitlesFromHotList.append(title)
        songArtistsFromHotList.append(artist)

In [3]:
def isSongInTheHotList(songName):
    return songName in songTitlesFromHotList

In [4]:
def authSpotify():
    global spotifySearch
    spotifySearch = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [5]:
def getTrackIdBySongName(songName):
    result = spotifySearch.search(q=songName,limit=1)

    return result["tracks"]["items"][0]["id"]

In [6]:
def convertTrackToAudioFeatureDataFrame(trackId):
    spotiftyTrackAudioFeatures=pd.DataFrame(spotifySearch.audio_features(trackId))    
    spotiftyTrackAudioFeatures=spotiftyTrackAudioFeatures[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","duration_ms"]]
    
    return spotiftyTrackAudioFeatures
    
    

In [7]:
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        if filename == "scaler.pickle":
            return StandardScaler()
        else:
            return KMeans(random_state=1234)

In [8]:
def recommendSongFromSpotify(spotiftyTrackAudioFeatures):
    scaler = load("scaler.pickle")
    spotiftyTrackAudioFeatures_scaled = scaler.transform(spotiftyTrackAudioFeatures)
    spotiftyTrackAudioFeatures_scaled_df = pd.DataFrame(spotiftyTrackAudioFeatures_scaled, columns = spotiftyTrackAudioFeatures.columns)
    kmeans = load("kmeans.pickle")
    predictedClusterList = kmeans.predict(spotiftyTrackAudioFeatures_scaled_df)
    
    clusteredSpotiftySongAuidoFeatures = pd.read_csv("ClusteredAudioFeaturesFromSpotify.csv")

    clusters = np.unique(predictedClusterList)
    
    filteredClusteredSpotiftySongAuidoFeatures = clusteredSpotiftySongAuidoFeatures[clusteredSpotiftySongAuidoFeatures['cluster'].isin(clusters)]
    
    selectedTrack = filteredClusteredSpotiftySongAuidoFeatures.sample()
    
    track = spotifySearch.track(selectedTrack.iloc[0]['id'])
    
    return track

    
    

In [9]:
songName = input('Please enter a song name to search\n')
songName = songName.strip().lower()

parseHtmltoFindSongTitlesAndArtists(urlHotList)

if isSongInTheHotList(songName):
    print("Recommended song for you: "+ random.choice(songTitlesFromHotList))
else:
    authSpotify()
    trackId = getTrackIdBySongName(songName)
    spotiftyTrackAudioFeatures = convertTrackToAudioFeatureDataFrame(trackId)
    track = recommendSongFromSpotify(spotiftyTrackAudioFeatures)
    
    print('Recommended song for you:' + track["name"]
              + ' by ' + track["album"]["artists"][0]["name"])
    
    display(IFrame(src="https://open.spotify.com/embed/track/"+track["id"],
                       width="320", height="80",frameborder="0",
                       allowtransparency="true",allow="encrypted-media",))    
        

Please enter a song name to search
easy on me
Recommended song for you:Rollin' On by Jackie Venson
