## Spotify song recommender

In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import config
from IPython.display import IFrame

In [2]:
# Connecting to spotify.

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [3]:
# Function to load my previously saved model.

def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 

In [37]:
# Function for song recommendations.

def song_recommender():
    
    song_input = input("Please enter a name of a song: ").title()

    if song_input in hot_songs.values:
        print()
        print("Your song is in the hot songs list!")
        print("Here is another hot song you could listen: ")
        print()
        print(hot_songs.sample())
    
    else:
        input_song = sp.search(q = song_input, limit = 1, market = "GB")
        
        song_id = input_song["tracks"]["items"][0]["id"]
        
        input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )
    
        input_song_features = pd.DataFrame(input_song_features)
        input_song_features_num = input_song_features._get_numeric_data()

        input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

        model = load("scaler.pickle")
        model_kmeans = load("kmeans_9.pickle")

        input_song_scaled = model.transform(input_song_features_num)

        input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

        input_song_cluster = model_kmeans.predict(input_song_scaled)

        match_song = spotify_songs[spotify_songs["cluster"] == int(input_song_cluster)]

        song_suggestion = match_song.sample()

        suggestion = song_suggestion["id"].iloc[0]
        
        print()
        print("Here is another song you can listen! Check that out!")
        print()
        print(IFrame(src="https://open.spotify.com/embed/track/" + song_id,
                   width="320",
                   height="80",
                   frameborder="0",
                   allowtransparency="true",
                   allow="encrypted-media",
                  ))

        return IFrame(src="https://open.spotify.com/embed/track/" + suggestion,
                    width="320",
                    height="80",
                    frameborder="0",
                    allowtransparency="true",
                    allow="encrypted-media",
                    )

In [35]:
# Data needed for the functions.

hot_songs = pd.read_csv('hot_songs.csv')
spotify_songs = pd.read_csv('spotify_songs.csv')
spotify_songs.drop(columns = ["Unnamed: 0"], inplace = True)

In [36]:
spotify_songs

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,energy.1,loudness.1,speechiness.1,acousticness.1,instrumentalness.1,liveness.1,valence.1,tempo.1,duration_ms.1,cluster
0,0.522,0.8170,0,-6.170,1,0.0512,0.32900,0.000002,0.0807,0.562,...,1.022293,0.471670,-0.421990,0.112103,-0.415597,-0.661491,0.143928,1.109252,-0.116854,7
1,0.692,0.6510,9,-8.267,1,0.0324,0.29200,0.002410,0.1050,0.706,...,0.278965,0.067342,-0.621782,-0.010085,-0.406758,-0.483231,0.748429,-0.796069,0.545346,3
2,0.682,0.6050,8,-10.771,1,0.0403,0.18500,0.003260,0.0469,0.914,...,0.072982,-0.415460,-0.537827,-0.363440,-0.403638,-0.909441,1.621597,0.208378,-0.559493,3
3,0.793,0.6010,4,-8.179,1,0.0317,0.03120,0.003510,0.1050,0.703,...,0.055070,0.084309,-0.629221,-0.871346,-0.402720,-0.483231,0.735835,-0.614074,0.025381,3
4,0.873,0.6510,1,-4.179,1,0.0374,0.02510,0.000107,0.2130,0.828,...,0.278965,0.855559,-0.568646,-0.891491,-0.415211,0.309034,1.260576,-0.184201,-0.467468,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7377,0.628,0.7370,7,-4.437,1,0.0410,0.06250,0.000000,0.0777,0.592,...,0.664062,0.805814,-0.530388,-0.767982,-0.415603,-0.683499,0.269866,-1.025927,-0.375787,2
7378,0.784,0.5390,8,-8.148,0,0.0628,0.69000,0.000017,0.0367,0.697,...,-0.222558,0.090287,-0.298715,1.304262,-0.415539,-0.984266,0.710648,0.495091,0.283185,3
7379,0.410,0.0485,0,-13.918,1,0.0380,0.94700,0.000000,0.1080,0.247,...,-2.418959,-1.022241,-0.562269,2.152973,-0.415603,-0.461224,-1.178418,0.638645,-0.535392,1
7380,0.854,0.6740,11,-4.896,1,0.0705,0.02040,0.000000,0.1010,0.818,...,0.381956,0.717313,-0.216886,-0.907012,-0.415603,-0.512575,1.218597,0.204258,-0.035757,3


In [38]:
# Testing the function.

song_recommender()

Please enter a name of a song: crazy in love

Here is another song you can listen! Check that out!

<IPython.lib.display.IFrame object at 0x16cc8fb50>


### Here is all the code I used for building the function.

In [70]:
# User input and checking if the song is in the hot list.

song_input = input("Please enter a name of a song: ").title()

if song_input in hot_songs.values:
    print()
    print("Your song is in the hot songs list!")
    print("Here is another song you could listen: ")
    print()
    print(hot_songs.sample())


Please enter a name of a song: back in black


#### Getting the features of the input song

In [71]:
input_song = sp.search(q = song_input, limit = 1, market = "GB")

In [72]:
input_song

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=Back+In+Black&type=track&market=GB&offset=0&limit=1',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/711MCceyCBcFnzjGY4Q7Un'},
       'href': 'https://api.spotify.com/v1/artists/711MCceyCBcFnzjGY4Q7Un',
       'id': '711MCceyCBcFnzjGY4Q7Un',
       'name': 'AC/DC',
       'type': 'artist',
       'uri': 'spotify:artist:711MCceyCBcFnzjGY4Q7Un'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/6mUdeDZCsExyJLMdAfDuwh'},
     'href': 'https://api.spotify.com/v1/albums/6mUdeDZCsExyJLMdAfDuwh',
     'id': '6mUdeDZCsExyJLMdAfDuwh',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b2730b51f8d91f3a21e8426361ae',
       'width': 640},
      {'height': 300,
       'url': 'https://i.scdn.co/image/ab67616d00001e020b51f8d91f3a21e8426361ae',
       'width': 300},
      {'height': 64,
       'url': 'https://i.sc

In [73]:
# Figuring out how to get song id-s.

song_id = input_song["tracks"]["items"][0]["id"]

In [74]:
# Getting audio features of input song.

input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )

In [75]:
# Getting only numerical data and putting it into a dataframe.

input_song_features = pd.DataFrame(input_song_features)
input_song_features_num = input_song_features._get_numeric_data()


In [76]:
# Dropping unnecessary columns.

input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

In [77]:
# Checking the result.

input_song_features_num

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,0.31,0.7,-5.678,0.047,0.011,0.00965,0.0828,0.763,188.386,255493


In [78]:
# Loading my model

model = load("scaler.pickle")

In [81]:
model_kmeans = load("kmeans_9.pickle")

In [79]:
# Scaling the audio features of input songs using the same model tranform as I used for my spotify songs.

input_song_scaled = model.transform(input_song_features_num)

In [80]:
# Making a dataframe.

input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

In [82]:
# Predicting to which cluster the input song belongs to.

input_song_cluster = model_kmeans.predict(input_song_scaled)

In [83]:
input_song_cluster

array([7], dtype=int32)

#### Recommending a new song

In [22]:
# Compare the cluster nr of the input song to my song df and return a new song from the same cluster nr.

match_song = spotify_songs[spotify_songs["cluster"] == int(input_song_cluster)]

In [23]:
song_suggestion = match_song.sample()

In [24]:
# Getting the id of the input song.

suggestion = song_suggestion["id"].iloc[0]

In [25]:
suggestion

'5eaVYUT0pGYVGoKIaCcrfx'

In [85]:
IFrame(src="https://open.spotify.com/embed/track/" + song_id,
                   width="320",
                   height="80",
                   frameborder="0",
                   allowtransparency="true",
                   allow="encrypted-media",
                  )