# Spotify recommender

In [22]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [23]:
top100 =  pd.read_csv("top100.csv") 

top100 = top100.drop('Unnamed: 0', axis=1)

top100

Unnamed: 0,title,artist
0,Stay Alive (Prod. by SUGA of BTS),Jung Kook
1,The Next Episode (feat. Snoop Dogg),Dr. Dre
2,Still D.R.E. (feat. Snoop Dogg),Dr. Dre
3,Lose Yourself,Eminem
4,Family Affair,Mary J. Blige
...,...,...
95,Beggin,Måneskin
96,Nuthin' but a G Thang (feat. Snoop Dogg),Dr. Dre
97,Boyfriend,Dove Cameron
98,Shake That (feat. Nate Dogg),Eminem


In [24]:
# i like to add also a lower case columns so it will find the song regardless of lettersize

top100['title_lower'] = top100['title'].apply(lambda x: x.lower())
top100['artist_lower'] = top100['artist'].apply(lambda x: x.lower())

top100

Unnamed: 0,title,artist,title_lower,artist_lower
0,Stay Alive (Prod. by SUGA of BTS),Jung Kook,stay alive (prod. by suga of bts),jung kook
1,The Next Episode (feat. Snoop Dogg),Dr. Dre,the next episode (feat. snoop dogg),dr. dre
2,Still D.R.E. (feat. Snoop Dogg),Dr. Dre,still d.r.e. (feat. snoop dogg),dr. dre
3,Lose Yourself,Eminem,lose yourself,eminem
4,Family Affair,Mary J. Blige,family affair,mary j. blige
...,...,...,...,...
95,Beggin,Måneskin,beggin,måneskin
96,Nuthin' but a G Thang (feat. Snoop Dogg),Dr. Dre,nuthin' but a g thang (feat. snoop dogg),dr. dre
97,Boyfriend,Dove Cameron,boyfriend,dove cameron
98,Shake That (feat. Nate Dogg),Eminem,shake that (feat. nate dogg),eminem


In [25]:
topsong = input("Enter a song or artist you like : ")
topsong = topsong.lower()

# Look for input in list and give random output
if topsong == "":
    print('Please give your input')
elif topsong in list(top100.title_lower):
    print(top100[['title','artist']].sample(3))
elif topsong in list(top100.artist_lower):
    print(top100[['title','artist']].sample(3))    
else:
    print('No item found in list')

Enter a song or artist you like : a
No item found in list


In [26]:
spot =  pd.read_csv("spot_cluster.csv") 

spot_pred = spot[['name','artist','clusters']]

spot_pred

Unnamed: 0,name,artist,clusters
0,You Know How We Do It,Ice Cube,1
1,Carlito Marron,Carlinhos Brown,1
2,Coastline,Hollow Coves,2
3,For What It's Worth,Various Artists,2
4,Higher Love,Kygo,1
...,...,...,...
2536,Sunshine Of Your Love,Cream,2
2537,Anarchy In The UK,Sex Pistols,1
2538,I Want To Hold Your Hand - Remastered 2015,The Beatles,1
2539,Kissing a Fool,George Michael,2


## getting the API to spotify

In [27]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id='<your client id here>',
                                                          client_secret='<your client secret here>'))
secrets_file = open("SpotifySecret.txt","r")
string = secrets_file.read()
string.split('\n')
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1]
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['cs']))

In [28]:
topsong = input("Enter a song or artist you like : ")
topsong = topsong.lower()

# Look for input in list and give random output
if topsong == "":
    print('Please give your input')
elif topsong in list(top100.title_lower):
    print(top100[['title','artist']].sample(3)) #gives random sample if match on title
elif topsong in list(top100.artist_lower):
    print(top100[['title','artist']].sample(3)) #gives random sample if match on artist   
else:
    print('No item found in list')

Enter a song or artist you like : a
No item found in list


In [29]:
# make search in spotify

TOPSONG_RES = sp.search(q=topsong, limit=1)

TOPSONG_RES

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=a&type=track&offset=0&limit=1',
  'items': [{'album': {'album_type': 'single',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/2VSHKHBTiXWplO8lxcnUC9'},
       'href': 'https://api.spotify.com/v1/artists/2VSHKHBTiXWplO8lxcnUC9',
       'id': '2VSHKHBTiXWplO8lxcnUC9',
       'name': 'GAYLE',
       'type': 'artist',
       'uri': 'spotify:artist:2VSHKHBTiXWplO8lxcnUC9'}],
     'available_markets': ['AD',
      'AE',
      'AG',
      'AL',
      'AM',
      'AO',
      'AR',
      'AT',
      'AU',
      'AZ',
      'BA',
      'BB',
      'BD',
      'BE',
      'BF',
      'BG',
      'BH',
      'BI',
      'BJ',
      'BN',
      'BO',
      'BR',
      'BS',
      'BT',
      'BW',
      'BY',
      'BZ',
      'CA',
      'CD',
      'CG',
      'CH',
      'CI',
      'CL',
      'CM',
      'CO',
      'CR',
      'CV',
      'CW',
      'CY',
      'CZ',
      'DE',
      'DJ',
      'D

In [30]:
# find and store variables of song

TOPSONG_ID = TOPSONG_RES['tracks']['items'][0]['id']


def getTrackFeatures(id):
    track_info = sp.track(id)
    features_info = sp.audio_features(id)
    
    length = track_info['duration_ms']
    popularity = track_info['popularity']
    
    acousticness = features_info[0]['acousticness']
    danceability = features_info[0]['danceability']
    energy = features_info[0]['energy']
    instrumentalness = features_info[0]['instrumentalness']
    liveness = features_info[0]['liveness']
    loudness = features_info[0]['loudness']
    speechiness = features_info[0]['speechiness']
    tempo = features_info[0]['tempo']
    time_signature = features_info[0]['time_signature']
    
    track_data = [length, popularity, acousticness, danceability, energy, instrumentalness, liveness, loudness, speechiness, tempo, time_signature]
    return track_data

cols = ["length","popularity","acousticness","danceability","energy","instrumentalness","liveness","loudness","speechiness","tempo","time_signature"]

TOPSONG_VAR = getTrackFeatures(TOPSONG_ID)

TOPSONG_VAR

[168601, 100, 0.299, 0.695, 0.54, 0, 0.367, -5.692, 0.0493, 121.932, 4]

In [31]:
# get the result in a df to be scaled

TOPSONG_DF = pd.DataFrame([TOPSONG_VAR],columns=cols)

TOPSONG_DF

Unnamed: 0,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,168601,100,0.299,0.695,0.54,0,0.367,-5.692,0.0493,121.932,4


In [32]:
# scale the variables of the song
# import scaler 
from sklearn.preprocessing import MinMaxScaler

import joblib

scaler = joblib.load('scaler')

scaler
TOPSONG_DF_SCALED = scaler.transform(TOPSONG_DF)

TOPSONG_DF_SCALED_DF = pd.DataFrame(TOPSONG_DF_SCALED,columns=cols)

TOPSONG_DF_SCALED_DF

Unnamed: 0,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature
0,0.091289,1.041667,0.3005,0.669725,0.529157,0.0,0.354606,0.873818,0.039153,0.369219,0.75


In [33]:
# find the cluster of the song

kmeans_fitted = joblib.load('spot_model')

kmeans_fitted 

CLUSTER_SONG = kmeans_fitted.predict(TOPSONG_DF_SCALED_DF) 

CLUSTER_SONG



array([1])

In [34]:
# return a rondom song from same cluster

CLUSTERLIST =  pd.read_csv('spot_cluster.csv')

REC_SONG = CLUSTERLIST.iloc[0]['clusters']

REC_SONG_OUT = CLUSTERLIST.loc[CLUSTERLIST['clusters'] == REC_SONG]

REC_SONG_RAND = REC_SONG_OUT.sample()

REC_SONG_RAND

Unnamed: 0,name,album,artist,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,clusters
349,Cooler Than Me - Gigamesh Radio Edit,Cooler Than Me EP,Mike Posner,2009-12-15,274186,36,0.03,0.629,0.894,0.00192,0.45,-4.677,0.0578,97.484,3,1


In [40]:
topsong = input("Enter a song or artist you like : ")
topsong = topsong.lower()

# Look for input in list and give random output
if topsong == "":
    print('Please give your input')
elif topsong in list(top100.title_lower):
    print(top100[['title','artist']].sample(3)) #gives random sample if match on title
elif topsong in list(top100.artist_lower):
    print(top100[['title','artist']].sample(3)) #gives random sample if match on artist   
else:
    TOPSONG_RES = sp.search(q=topsong, limit=1)    
    TOPSONG_ID = TOPSONG_RES['tracks']['items'][0]['id']
    TOPSONG_VAR = getTrackFeatures(TOPSONG_ID)
    TOPSONG_DF = pd.DataFrame([TOPSONG_VAR],columns=cols)
    TOPSONG_DF_SCALED = scaler.transform(TOPSONG_DF)
    TOPSONG_DF_SCALED_DF = pd.DataFrame(TOPSONG_DF_SCALED,columns=cols)
    CLUSTER_SONG = kmeans_fitted.predict(TOPSONG_DF_SCALED_DF)
    CLUSTERLIST =  pd.read_csv('spot_cluster.csv')
    REC_SONG = CLUSTERLIST.iloc[0]['clusters']
    REC_SONG_OUT = CLUSTERLIST.loc[CLUSTERLIST['clusters'] == REC_SONG]
    REC_SONG_RAND = REC_SONG_OUT.sample()
    display(pd.DataFrame(REC_SONG_RAND))
    
    

Enter a song or artist you like : bohemian rhapsody




Unnamed: 0,name,album,artist,release_date,length,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,time_signature,clusters
906,More Than a Feeling,Boston,Boston,1976,285133,80,0.00088,0.377,0.681,0.0023,0.0504,-8.039,0.0298,108.789,4,1
