# The code for the "song recommender" app

In [1]:
# import relevant libraries
import requests # to download html code
from bs4 import BeautifulSoup # to navigate through the html code
import pandas as pd
import numpy as np
import re
import random
import config

In [2]:
top_100 = pd.read_csv("top_100.csv", index_col=False)

In [3]:
top_100.head()

Unnamed: 0.1,Unnamed: 0,Artist,Song
0,0,Miley Cyrus,Flowers
1,1,SZA,Kill Bill
2,2,Morgan Wallen,Last Night
3,3,Sam Smith & Kim Petras,Unholy
4,4,"Metro Boomin, The Weeknd & 21 Savage",Creepin'


In [4]:
# writing the song recommender function
def song_recommender():
    """Function that prompts the user for song input
    and checks whether it's in the top-100 csv file."""
    top_100 = pd.read_csv("top_100.csv")
    user_input = input("Please type in your favorite song: ")
    if user_input.lower() in map(str.lower, list(top_100["Song"])):
        print("Your song is in the current top-100.")
        random_song = random.choice(list(top_100["Song"]))
        artist = top_100.loc[top_100["Song"] == random_song, 'Artist'].to_string(index=False)
        print(f"\nHere is another song from the top-100 you might like: {random_song} by {artist}")
    else: 
        print("Your song is not in the current top-100. Let me give you a suggestion for another song.")

In [5]:
# writing the while loop that ensures the programme keeps running
while True:
    song_recommender()
    user_input = input("\nDo you want to input another song? yes/no: ")
    if user_input == "yes":
        continue
    elif user_input == "no":
        print("Your session has ended. Thank you for using our app!")
        break
    else:
        print("Please write either 'yes' or 'no'")
        continue

Please type in your favorite song: Flowers
Your song is in the current top-100.

Here is another song from the top-100 you might like: Tennessee Fan by Morgan Wallen

Do you want to input another song? yes/no: no
Your session has ended. Thank you for using our app!


# Creating the song database with audio features

In [6]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials


# initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [7]:
# checking a playlist
playlist = sp.user_playlist_tracks("spotify", "27gN69ebwiJRtXEboL12Ih")
playlist["items"][0]

{'added_at': '2022-11-26T02:31:48Z',
 'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/celsum76'},
  'href': 'https://api.spotify.com/v1/users/celsum76',
  'id': 'celsum76',
  'type': 'user',
  'uri': 'spotify:user:celsum76'},
 'is_local': False,
 'primary_color': None,
 'track': {'album': {'album_type': 'album',
   'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/5M52tdBnJaKSvOpJGz8mfZ'},
     'href': 'https://api.spotify.com/v1/artists/5M52tdBnJaKSvOpJGz8mfZ',
     'id': '5M52tdBnJaKSvOpJGz8mfZ',
     'name': 'Black Sabbath',
     'type': 'artist',
     'uri': 'spotify:artist:5M52tdBnJaKSvOpJGz8mfZ'}],
   'available_markets': [],
   'external_urls': {'spotify': 'https://open.spotify.com/album/1JA2UhLRbFRkmoh6Lz64KH'},
   'href': 'https://api.spotify.com/v1/albums/1JA2UhLRbFRkmoh6Lz64KH',
   'id': '1JA2UhLRbFRkmoh6Lz64KH',
   'images': [{'height': 640,
     'url': 'https://i.scdn.co/image/ab67616d0000b2738c9131b3b0f5a4b9fdc5a0ac',
    

In [8]:
# function that retrieves the songs from a playlist
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [9]:
# creating a list with playlists id's and defining the variable 'tracks' to store the results
playlist_ids = ["27gN69ebwiJRtXEboL12Ih", "37i9dQZF1DX4pUKG1kS0Ac", "37i9dQZF1DXbITWG1ZJKYt", "37i9dQZF1DXbSbnqxMTGx9", "37i9dQZF1DX8a1tdzq5tbM", "37i9dQZF1EQpj7X7UK8OOF", "37i9dQZF1EQqkOPvHGajmW", "17IFbN8moTMWsaK5S5qCyD", "5i1mrZvsI0IertAQSeveAL", "42eLtt8RUBboyXLmNQWU5a", "1h0CEZCm6IbFTbxThn6Xcs", "1yswYW9ONg19LAlBOv87qq", "4hKNssGmIVEcWu1MF7HzgN", "32BC2f8hmAUmvsQgr9Mzsm", "37i9dQZF1DX10zKzsJ2jva", "37i9dQZF1DWY7IeIP1cdjF", "37i9dQZF1DX3MU5XUozve7", "37i9dQZF1DWYV7OOaGhoH0", "37i9dQZF1DX9XIFQuFvzM4", "37i9dQZF1DX0QNpebF7rcL", "37i9dQZF1DWVJ0TKGKfzgP", "37i9dQZF1DX5uWybZqQtdt"]
tracks = []
# genres included: metal, guilty pleasures, jazz, reggae, dance, rock, indie, rap/hiphop, chill, classic, techno, dutch, latin, reggaeton, punk, folk/acoustic, soul, blues, funk, live music

# for loop that loops through the playlist ids and retrieves the song with the get_playlist_tracks function
for i in playlist_ids:
    tracks += get_playlist_tracks("spotify", i)

In [10]:
# checking how many tracks are in the Pandas dataframe
print(len(tracks))

3219


In [None]:
# getting the audio features
list_of_audio_features = []
for item in range(len(tracks)):
    #print (tracks[item]["track"]["id"])
    list_of_audio_features.append(sp.audio_features(tracks[item]["track"]["id"])[0])

In [None]:
print(len(tracks))

In [None]:
songs_df = pd.DataFrame(list_of_audio_features)    
songs_df = songs_df[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","id","duration_ms"]]

songs_df

# Scaling the audio features of the song database and requesting the user's song

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
import numpy as np

In [None]:
audio_features = songs_df[["danceability", "energy", "loudness", "speechiness", "acousticness", "liveness", "valence", "tempo"]]

In [None]:
audio_features.head()

In [None]:
scaler = StandardScaler()
scaler.fit(audio_features)
audio_features_scaled = scaler.transform(audio_features)
audio_features_scaled

In [None]:
kmeans = KMeans(n_clusters=8, random_state=1234)
kmeans.fit(audio_features_scaled)
# assign a cluster to each example
labels = kmeans.predict(audio_features_scaled)
# retrieve unique clusters
clusters = np.unique(labels)
# create scatter plot for samples from each cluster
for cluster in clusters:
    # get row indexes for samples with this cluster
    row_ix = np.where(labels == cluster)
    # create scatter of these samples
    pyplot.scatter(audio_features.to_numpy()[row_ix, 0], audio_features.to_numpy()[row_ix,1])
    pyplot.xlabel("energy")
    pyplot.ylabel("instrumentalness")
    # show the plot
pyplot.show()

In [None]:
clusters = kmeans.predict(audio_features_scaled)
# clusters
pd.Series(clusters).value_counts().sort_index()

In [None]:
# saving with pickle
import pickle

# scaler = StandardScaler()
# model = KMeans()

with open("scaler_song_recommender.pickle", "wb") as f:
    pickle.dump(scaler,f)

with open("kmeans_song_recommender.pickle", "wb") as f:
    pickle.dump(kmeans,f)

In [None]:
# getting the scaled songs back into a Pandas dataframe
scaled_songs_df = pd.DataFrame(audio_features_scaled, columns=["danceability", "energy", "loudness", "speechiness", "acousticness", "liveness", "valence", "tempo"])
scaled_songs_df.head()

In [None]:
scaled_songs_df

# Scaling the audio features of song provided by the user

In [None]:
# seaching spotify for song provided by the user
# todo: add artist to be more specific
user_song = "Anyway the wind blows"
result = sp.search(q=user_song,limit=1)

In [None]:
# getting the id of a song
result["tracks"]["items"][0]["id"]

In [None]:
# getting the audio features of a song using the song id
user_song_audio_features = sp.audio_features(result["tracks"]["items"][0]["id"])
user_song_audio_features

In [None]:
# creating a Pandas dataframe with the audio features of the user song
user_song_df = pd.DataFrame(user_song_audio_features)    
user_song_df = user_song_df[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","id","duration_ms"]]

user_song_df

In [None]:
user_song_audio_features = user_song_df[["danceability", "energy", "loudness", "speechiness", "acousticness", "liveness", "valence", "tempo"]]

In [None]:
# scaling the song provided by the user
user_song_audio_features_scaled = scaler.transform(user_song_audio_features)
user_song_audio_features_scaled

In [None]:
# use kmeans.predict() to predict what cluster the song by user belongs to
label = kmeans.predict(user_song_audio_features_scaled)
label = int(label)

In [None]:
# including the "clusters", "id" and "duration_ms" columns to the scaled_songs_df
scaled_songs_df["clusters"] = clusters
scaled_songs_df = scaled_songs_df.join(songs_df[["id", "duration_ms"]])
scaled_songs_df

In [None]:
# saving a csv file of the scaled_songs_df
scaled_songs_df.to_csv("scaled_songs_df.csv", index=False)

In [None]:
# including the "id" and "duration_ms" columns to the scaled_user_song_df
scaled_user_song_df = pd.DataFrame(user_song_audio_features_scaled, columns=["danceability", "energy", "loudness", "speechiness", "acousticness", "liveness", "valence", "tempo"])
scaled_user_song_df = scaled_user_song_df.join(user_song_df[["id", "duration_ms"]])
scaled_user_song_df.head()

In [None]:
scaled_songs_df.head()

In [None]:
# creating a sample dataframe based on matching the clusters (could probably be done more efficiently)
sample_df = scaled_songs_df.loc[scaled_songs_df["clusters"]== label]
sample_df.head()

In [None]:
# taking the sample song
rec_song_df = sample_df.sample(1)
rec_song_df

In [None]:
# isolate track_id
rec_song_id = rec_song_df["id"].to_string(index=False)
rec_song_id

In [None]:
# embed the Spotify player and passs it the rec_song_id
from IPython.display import IFrame

IFrame(src="https://open.spotify.com/embed/track/"+rec_song_id,
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
       )