In [2]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn import preprocessing
from sklearn.preprocessing import minmax_scale

Kaggle Link: https://www.kaggle.com/rodolfofigueroa/spotify-12m-songs

In [5]:
df = pd.read_csv('tracks_features.csv')
df.head()

Unnamed: 0,id,name,album,album_id,artists,artist_ids,track_number,disc_number,explicit,danceability,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year,release_date
0,7lmeHLHBe4nmXzuXc0HDjk,Testify,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],1,1,False,0.47,...,0.0727,0.0261,1.1e-05,0.356,0.503,117.906,210133,4.0,1999,1999-11-02
1,1wsRitfRRtWyEapl0q22o8,Guerrilla Radio,The Battle Of Los Angel,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],2,1,True,0.599,...,0.188,0.0129,7.1e-05,0.155,0.489,103.68,206200,4.0,1999,1999-11-02
2,1hR0fIFK2qRG3f3RF70pb7,Calm Like a Bomb,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],3,1,False,0.315,...,0.483,0.0234,2e-06,0.122,0.37,149.749,298893,4.0,1999,1999-11-02
3,2lbASgTSoDO7MTuLAXlTW0,Mic Check,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],4,1,True,0.44,...,0.237,0.163,4e-06,0.121,0.574,96.752,213640,4.0,1999,1999-11-02
4,1MQTmpYOZ6fcMQc56Hdo7T,Sleep Now In the Fyre,The Battle Of Los Angeles,2eia0myWFgoHuttJytCxgX,['Rage Against The Machine'],['2d0hyoQ5ynDBnkvAbJKORj'],5,1,False,0.426,...,0.0701,0.00162,0.105,0.0789,0.539,127.059,205600,4.0,1999,1999-11-02


We can recommend songs that are similiar to searched results via some of the song characteristics like danceability, acousticsness, instrumentalness, etc.

In [20]:
# We can use a KNN (Nearest Neighbors)

def create_knnmodel(X, number_neighbors):
    '''Gets Similar Songs based on Number'''
    # Had to include Min because Error would pop up with different operands
    model = NearestNeighbors(n_neighbors=(min(number_neighbors+1, X.shape[0])))
    model.fit(X)
    return model

# Categories I mentioned earlier that we would need to compare values with
X = df[['id', 'acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature']]
# For easier reading, can set the index for ID
X.set_index('id', inplace=True)
X.head()

Unnamed: 0_level_0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
7lmeHLHBe4nmXzuXc0HDjk,0.0261,0.47,210133,0.978,1.1e-05,7,0.356,-5.399,1,0.0727,117.906,4.0
1wsRitfRRtWyEapl0q22o8,0.0129,0.599,206200,0.957,7.1e-05,11,0.155,-5.764,1,0.188,103.68,4.0
1hR0fIFK2qRG3f3RF70pb7,0.0234,0.315,298893,0.97,2e-06,7,0.122,-5.424,1,0.483,149.749,4.0
2lbASgTSoDO7MTuLAXlTW0,0.163,0.44,213640,0.967,4e-06,11,0.121,-5.83,0,0.237,96.752,4.0
1MQTmpYOZ6fcMQc56Hdo7T,0.00162,0.426,205600,0.929,0.105,2,0.0789,-6.729,1,0.0701,127.059,4.0


In [13]:
# Here I will take one of the entries as an example and organize it into a DF

q={'danceability': 0.470, 
   'energy': 0.978,
   'key': 7,
   'loudness': -5.399,
   'mode': 1,
   'speechiness': 0.0727,
   'acousticness': 0.02610,
   'instrumentalness': 0.000011,
   'liveness': 0.3560,
   'tempo': 117.906,
   'id': '7lmeHLHBe4nmXzuXc0HDjk',
   'type': 'audio_features',
   'duration_ms': 210133,
   'time_signature': 4}

In [17]:
r = pd.DataFrame({'id':[q['id']],
                  'acousticness':[q['acousticness']],
                  'danceability':[q['danceability']],
                  'duration_ms':[q['duration_ms']],
                  'energy':[q['energy']],
                  'instrumentalness':[q['instrumentalness']],
                  'key':[q['key']],
                  'liveness':[q['liveness']],
                  'loudness':[q['loudness']],
                  'mode':[q['mode']],
                  'speechiness':[q['speechiness']],
                  'tempo':[q['tempo']],
                  'time_signature':[q['time_signature']]})
r.set_index('id', inplace=True)
r.head()

Unnamed: 0_level_0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
7lmeHLHBe4nmXzuXc0HDjk,0.0261,0.47,210133,0.978,1.1e-05,7,0.356,-5.399,1,0.0727,117.906,4


In [22]:
model = create_knnmodel(X, 10)
import pickle
my_model = pickle.dumps(model)

In [28]:
# Saving Model
with open('default_model', 'wb') as files:
 pickle.dump(model, files)

In [36]:
# Loading Model
with open('default_model' , 'rb') as f:
    model = pickle.load(f)
