In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import config
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
from IPython.display import IFrame

In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=config.client_id, client_secret=config.client_secret))


In [3]:
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 

In [4]:
# Function for song recommendations.

def song_recommender():
    
    song_input = input("Please enter a name of a song: ").title()

    if song_input in hot_songs.values:
        print()
        print("Your song is in the hot songs list!")
        print("Here is another hot song you could listen: ")
        print()
        print(hot_songs.sample())
    
    else:
        input_song = sp.search(q = song_input, limit = 1, market = "GB")
        
        song_id = input_song["tracks"]["items"][0]["id"]
        
        input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )
    
        input_song_features = pd.DataFrame(input_song_features)
        input_song_features_num = input_song_features._get_numeric_data()

        input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

        model = load("scaler.pickle")
        model_kmeans = load("kmeans_9.pickle")

        input_song_scaled = model.transform(input_song_features_num)

        input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

        input_song_cluster = model_kmeans.predict(input_song_scaled)

        match_song = all_spotify[all_spotify["cluster"] == int(input_song_cluster)]

        song_suggestion = match_song.sample()

        suggestion = song_suggestion["id"].iloc[0]
        
        print()
        print("Here is another song you can listen! Check that out!")
        print()
        print(IFrame(src="https://open.spotify.com/embed/track/" + song_id,
                   width="320",
                   height="80",
                   frameborder="0",
                   allowtransparency="true",
                   allow="encrypted-media",
                  ))

        return IFrame(src="https://open.spotify.com/embed/track/" + suggestion,
                    width="320",
                    height="80",
                    frameborder="0",
                    allowtransparency="true",
                    allow="encrypted-media",
                    )

In [5]:
# Data needed for the functions.

#hot_songs = pd.read_csv('hot_songs.csv')
#spotify_songs = pd.read_csv('spotify_songs.csv')
#spotify_songs.drop(columns = ["Unnamed: 0"], inplace = True)

In [6]:
# load data
cluster_id=pd.read_csv('cluster_id.csv')

In [7]:
all_spotify = pd.read_csv('all_spotify.csv')
#all_spotify.drop(columns =["index", "Unnamed: 0", "Unnamed: 0.1"], inplace=True)

In [8]:
# Loading the dataset with hot songs
hot_songs = pd.read_csv('music_list.csv')
hot_songs.drop(columns =["Unnamed: 0"], inplace=True)

In [9]:
hot_songs

Unnamed: 0,title,artist_name
0,We Don't Talk About Bruno,"Carolina Gaitan, Mauro Castillo, Adassa, Rhenz..."
1,Do We Have A Problem?,Nicki Minaj X Lil Baby
2,Easy On Me,Adele
3,Heat Waves,Glass Animals
4,Stay,The Kid LAROI & Justin Bieber
...,...,...
95,Iffy,Chris Brown
96,When I'm Gone,Alesso / Katy Perry
97,Fair Trade,Drake Featuring Travis Scott
98,Megan's Piano,Megan Thee Stallion


In [10]:
all_spotify 

Unnamed: 0.1,Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,...,energy.1,loudness.1,speechiness.1,acousticness.1,instrumentalness.1,liveness.1,valence.1,tempo.1,duration_ms.1,cluster
0,0,0.522,0.817,0,-6.170,1,0.0512,0.32900,0.000002,0.0807,...,0.910733,0.401315,-0.448736,0.225551,-0.379572,-0.675984,0.177313,1.108334,-0.149095,3
1,1,0.533,0.776,8,-7.092,1,0.0847,0.15100,0.000087,0.1300,...,0.722733,0.207375,-0.113355,-0.392381,-0.379230,-0.315229,0.755771,-1.090022,-0.016738,4
2,2,0.692,0.651,9,-8.267,1,0.0324,0.29200,0.002410,0.1050,...,0.149560,-0.039784,-0.636950,0.097104,-0.369870,-0.498168,0.794335,-0.806895,0.546104,4
3,3,0.873,0.651,1,-4.179,1,0.0374,0.02510,0.000107,0.2130,...,0.149560,0.820118,-0.586893,-0.829447,-0.379147,0.292126,1.317091,-0.191845,-0.517180,4
4,4,0.682,0.605,8,-10.771,1,0.0403,0.18500,0.003260,0.0469,...,-0.061367,-0.566494,-0.557860,-0.274349,-0.366446,-0.923317,1.685590,0.202775,-0.613792,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9861,9861,0.585,0.776,5,-5.898,1,0.0459,0.05720,0.000008,0.3000,...,0.722733,0.458530,-0.501796,-0.718011,-0.379547,0.928752,-0.401146,-1.524831,0.405954,0
9862,9862,0.377,0.681,7,-8.039,1,0.0298,0.00088,0.002300,0.0504,...,0.287122,0.008175,-0.662980,-0.913527,-0.370313,-0.897706,-1.009599,-0.415588,0.972323,0
9863,9863,0.601,0.794,7,-5.844,0,0.0671,0.00987,0.000001,0.4180,...,0.805270,0.469889,-0.289555,-0.882318,-0.379572,1.792222,-1.185279,-0.225553,0.023858,5
9864,9864,0.588,0.670,0,-6.439,1,0.0473,0.01040,0.000003,0.3380,...,0.236683,0.344732,-0.487780,-0.880478,-0.379565,1.206819,-0.066926,-0.078155,0.317652,0


In [11]:
# Testing the function.

song_recommender()

Please enter a name of a song: La vita e bella

Here is another song you can listen! Check that out!

<IPython.lib.display.IFrame object at 0x7fabaa3f3d60>


## Here is all the code I used for building the function.

In [12]:
# User input and checking if the song is in the hot list.

song_input=input("Please enter a name of a song: ").title()

if song_input in hot_songs.values:
    print()
    print("Your song is in the hot songs list!")
    print("Here is one song recommmendation to listen: ")
    print()
    print(hot_songs.sample())

Please enter a name of a song: by by


## Get the futures of the input song

In [13]:
input_song=sp.search(q=song_input, limit=1, market="GB")

In [14]:
input_song

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=By+By&type=track&market=GB&offset=0&limit=1',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/7hJcb9fa4alzcOq3EaNPoG'},
       'href': 'https://api.spotify.com/v1/artists/7hJcb9fa4alzcOq3EaNPoG',
       'id': '7hJcb9fa4alzcOq3EaNPoG',
       'name': 'Snoop Dogg',
       'type': 'artist',
       'uri': 'spotify:artist:7hJcb9fa4alzcOq3EaNPoG'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/61qj9MgqlVi0xzi55mHZiX'},
     'href': 'https://api.spotify.com/v1/albums/61qj9MgqlVi0xzi55mHZiX',
     'id': '61qj9MgqlVi0xzi55mHZiX',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b2739a23b150cea8488e1ea31b53',
       'width': 640},
      {'height': 300,
       'url': 'https://i.scdn.co/image/ab67616d00001e029a23b150cea8488e1ea31b53',
       'width': 300},
      {'height': 64,
       'url': 'https://i.scdn.

In [15]:
# Figuring out how to get song id-s.

song_id = input_song["tracks"]["items"][0]["id"]

In [16]:
# Getting audio features of input song.

input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )

In [17]:
input_song_features

[{'danceability': 0.587,
  'energy': 0.838,
  'key': 5,
  'loudness': -2.505,
  'mode': 1,
  'speechiness': 0.281,
  'acousticness': 0.125,
  'instrumentalness': 0,
  'liveness': 0.213,
  'valence': 0.457,
  'tempo': 75.681,
  'type': 'audio_features',
  'id': '2r3VvWz6pQT7lx5lxpKoxY',
  'uri': 'spotify:track:2r3VvWz6pQT7lx5lxpKoxY',
  'track_href': 'https://api.spotify.com/v1/tracks/2r3VvWz6pQT7lx5lxpKoxY',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/2r3VvWz6pQT7lx5lxpKoxY',
  'duration_ms': 164650,
  'time_signature': 4}]

In [18]:
# Getting only numerical data and putting it into a dataframe.

input_song_features = pd.DataFrame(input_song_features)
input_song_features_num = input_song_features._get_numeric_data()


In [19]:
# Dropping unnecessary columns.

input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

In [20]:
# Checking the result.

input_song_features_num

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,0.587,0.838,-2.505,0.281,0.125,0,0.213,0.457,75.681,164650


In [21]:
# Loading my model

model = load("scaler.pickle")

In [22]:
model_kmeans = load("kmeans_9.pickle")

In [23]:
# Scaling the audio features of input songs using the same model tranform as I used for my spotify songs.

input_song_scaled = model.transform(input_song_features_num)

In [24]:
# Making a dataframe.

input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

In [25]:
# Predicting to which cluster the input song belongs to.

input_song_cluster = model_kmeans.predict(input_song_scaled)

In [26]:
input_song_cluster

array([8], dtype=int32)

In [27]:
model_kmeans

KMeans(n_clusters=9, random_state=1234)

## Recommending a new song

In [28]:
# Compare the cluster nr of the input song to my song df and return a new song from the same cluster nr.

match_song = all_spotify[all_spotify["cluster"] == int(input_song_cluster)]

In [29]:
song_suggestion = match_song.sample()

In [30]:
# Getting the id of the input song.

suggestion = song_suggestion["id"].iloc[0]

In [31]:
suggestion

'4LvRT9c5IKxC8GHVPAAaHb'

In [32]:
IFrame(src="https://open.spotify.com/embed/track/" + song_id,
                   width="320",
                   height="80",
                   frameborder="0",
                   allowtransparency="true",
                   allow="encrypted-media",
                  )