# **Content-Based Recommendation Engine**

In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import pickle

In [2]:
spotify_df = pd.read_csv('../data/spotify_audio_features.csv')
spotify_df

Unnamed: 0,artist_name,track_name,track_id,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,SOMI,DUMB DUMB,6dG2zPUOWXk3eMC7Hb3wh3,75,0.850,0.734,2,-6.415,1,0.0491,0.02210,0.000179,0.0728,0.4060,103.027,149214,4
1,ENHYPEN,FEVER,0UzymivvUH5s8z4PeWZJaK,82,0.600,0.713,6,-5.079,0,0.0592,0.06420,0.000000,0.1420,0.4710,72.999,172107,4
2,LISA,LALISA,2KZ3sNqPogEyMUUH6A5HFy,86,0.804,0.652,8,-6.282,1,0.0898,0.02670,0.000782,0.1180,0.8540,150.076,200690,4
3,SUSHI,Naruto Main Theme,4SWBuoXEtpFW0JfZTWgYkq,43,0.581,0.773,9,-5.588,0,0.0276,0.00258,0.001910,0.3260,0.0977,104.970,137143,4
4,LISA,MONEY,45OX2jjEw1l7lOFJfDP9fv,86,0.826,0.553,1,-10.121,0,0.2340,0.16300,0.000041,0.1350,0.4000,140.037,168228,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6631,Troye Sivan,Angel Baby,2m6Ko3CY1qXNNja8AlugNc,78,0.559,0.559,11,-6.425,1,0.0358,0.01450,0.000000,0.1380,0.3380,72.498,220780,4
6632,Troye Sivan,Easy (with Kacey Musgraves feat. Mark Ronson),4XWaUb7mKRcJR5RPp4IIDr,72,0.581,0.674,8,-7.090,1,0.0330,0.00187,0.000003,0.1280,0.4880,143.005,213572,4
6633,Troye Sivan,Dance To This (feat. Ariana Grande),2NjO87HyT80fsgejd3PLYW,69,0.736,0.756,11,-7.099,1,0.0470,0.00632,0.005770,0.1060,0.7120,117.016,231847,4
6634,Troye Sivan,My My My!,4NiehSBQthimPzRsVeOgCT,68,0.701,0.458,1,-7.749,1,0.0836,0.00407,0.000151,0.0450,0.4820,102.967,204727,4


In [3]:
spotify_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6636 entries, 0 to 6635
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   artist_name       6636 non-null   object 
 1   track_name        6636 non-null   object 
 2   track_id          6636 non-null   object 
 3   popularity        6636 non-null   int64  
 4   danceability      6636 non-null   float64
 5   energy            6636 non-null   float64
 6   key               6636 non-null   int64  
 7   loudness          6636 non-null   float64
 8   mode              6636 non-null   int64  
 9   speechiness       6636 non-null   float64
 10  acousticness      6636 non-null   float64
 11  instrumentalness  6636 non-null   float64
 12  liveness          6636 non-null   float64
 13  valence           6636 non-null   float64
 14  tempo             6636 non-null   float64
 15  duration_ms       6636 non-null   int64  
 16  time_signature    6636 non-null   int64  


In [4]:
model_neighbor = NearestNeighbors(n_neighbors= 8, metric = 'cosine', algorithm = 'auto')
model_neighbor.fit(spotify_df.iloc[:, 3:]) # omitting all the string columns

NearestNeighbors(metric='cosine', n_neighbors=8)

# ***Inference***

In [5]:
distances, indices = model_neighbor.kneighbors(spotify_df.iloc[1, 3:].values.reshape(1, -1), n_neighbors = 7)
distances, indices

(array([[0.00000000e+00, 2.86257018e-10, 7.37239714e-10, 8.32891089e-10,
         1.13629040e-09, 1.16121257e-09, 1.52730528e-09]]),
 array([[   1, 4074, 6533, 2997, 2026,   57, 1321]]))

In [6]:
spotify_df.iloc[1, :2].to_list()

['ENHYPEN', 'FEVER']

In [7]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(spotify_df.index[1]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, spotify_df.iloc[indices.flatten()[i], :2].to_list(), distances.flatten()[i]))

Recommendations for 1:

1: ['Travis Scott', 'HIGHEST IN THE ROOM'], with distance of 2.8625701808948634e-10:
2: ['Justin Bieber', 'Don’t Go (with Justin Bieber & Don Toliver)'], with distance of 7.37239713721749e-10:
3: ['XXXTENTACION', 'SAD!'], with distance of 8.328910894306318e-10:
4: ['Shawn Mendes', 'Treat You Better'], with distance of 1.1362903951095404e-09:
5: ['Justin Bieber', 'Peaches (feat. Daniel Caesar & Giveon)'], with distance of 1.1612125705440235e-09:
6: ['The Lumineers', 'Ophelia'], with distance of 1.527305282245095e-09:


# ***Saving the model***

In [8]:
with open('../models/spotify-recommender.pkl', 'wb') as f:
    pickle.dump(model_neighbor, f)